VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 107631

Last change on this file since 107631 was 107209, checked in by vboxsync, 7 weeks ago

VMM/IEM: Reworked the disassembly annotations when using our own disassembler. [scm fix] jiraref:VBP-1466

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 457.3 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 107209 2024-12-02 10:36:49Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#define VBOX_DIS_WITH_ARMV8
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/dbgf.h>
55#include <VBox/vmm/tm.h>
56#include "IEMInternal.h"
57#include <VBox/vmm/vmcc.h>
58#include <VBox/log.h>
59#include <VBox/err.h>
60#include <VBox/dis.h>
61#include <VBox/param.h>
62#include <iprt/assert.h>
63#include <iprt/mem.h>
64#include <iprt/string.h>
65#if defined(RT_ARCH_AMD64)
66# include <iprt/x86.h>
67#elif defined(RT_ARCH_ARM64)
68# include <iprt/armv8.h>
69#endif
70
71#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
72# include "/opt/local/include/capstone/capstone.h"
73#endif
74
75#include "IEMInline.h"
76#include "IEMThreadedFunctions.h"
77#include "IEMN8veRecompiler.h"
78#include "IEMN8veRecompilerEmit.h"
79#include "IEMN8veRecompilerTlbLookup.h"
80#include "IEMNativeFunctions.h"
81#include "target-x86/IEMAllN8veEmit-x86.h"
82
83
84/*
85 * Narrow down configs here to avoid wasting time on unused configs here.
86 * Note! Same checks in IEMAllThrdRecompiler.cpp.
87 */
88
89#ifndef IEM_WITH_CODE_TLB
90# error The code TLB must be enabled for the recompiler.
91#endif
92
93#ifndef IEM_WITH_DATA_TLB
94# error The data TLB must be enabled for the recompiler.
95#endif
96
97#ifndef IEM_WITH_SETJMP
98# error The setjmp approach must be enabled for the recompiler.
99#endif
100
101/** @todo eliminate this clang build hack. */
102#if RT_CLANG_PREREQ(4, 0)
103# pragma GCC diagnostic ignored "-Wunused-function"
104#endif
105
106
107/*********************************************************************************************************************************
108* Internal Functions *
109*********************************************************************************************************************************/
110#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
111static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
112#endif
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
114DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
115 IEMNATIVEGSTREG enmGstReg, uint32_t off);
116DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
117static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
118
119
120
121/*********************************************************************************************************************************
122* Native Recompilation *
123*********************************************************************************************************************************/
124
125
126/**
127 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
128 */
129IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
130{
131 pVCpu->iem.s.cInstructions += idxInstr;
132 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
133}
134
135
136/**
137 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
138 */
139DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
140{
141 uint64_t fCpu = pVCpu->fLocalForcedActions;
142 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
143 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
144 | VMCPU_FF_TLB_FLUSH
145 | VMCPU_FF_UNHALT );
146 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
147 if (RT_LIKELY( ( !fCpu
148 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
149 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
150 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
151 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
152 return false;
153 return true;
154}
155
156
157/**
158 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
159 */
160template<bool const a_fWithIrqCheck>
161IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
162 uint32_t fFlags, RTGCPHYS GCPhysPc))
163{
164 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
165 Assert(idxTbLookup < pTb->cTbLookupEntries);
166 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
167#if 1
168 PIEMTB const pNewTb = *ppNewTb;
169 if (pNewTb)
170 {
171# ifdef VBOX_STRICT
172 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
173 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
174 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
175 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
176 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
177 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
178# endif
179 if (pNewTb->GCPhysPc == GCPhysPc)
180 {
181# ifdef VBOX_STRICT
182 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
184 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
185 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
186 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
187# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
188 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
189# else
190 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
191 {
192 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
193 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
194 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
195 }
196# endif
197 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
198 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
199 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
200#endif
201
202 /*
203 * Check them + type.
204 */
205 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
206 {
207 /*
208 * Check for interrupts and stuff.
209 */
210 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
211 * The main problem are the statistics and to some degree the logging. :/ */
212 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
213 {
214 /* Do polling. */
215 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
216 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
217 {
218 /*
219 * Success. Update statistics and switch to the next TB.
220 */
221 if (a_fWithIrqCheck)
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
223 else
224 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
225
226 pNewTb->cUsed += 1;
227 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
228 pVCpu->iem.s.pCurTbR3 = pNewTb;
229 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
230 pVCpu->iem.s.cTbExecNative += 1;
231 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
232 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
233 return (uintptr_t)pNewTb->Native.paInstructions;
234 }
235 }
236 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
237 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
238 }
239 else
240 {
241 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
242 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
243 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
244 }
245 }
246 else
247 {
248 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
249 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
250 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
251 }
252 }
253 else
254 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
255#else
256 NOREF(GCPhysPc);
257#endif
258
259 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
260 return 0;
261}
262
263
264/**
265 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
266 */
267template <bool const a_fWithIrqCheck>
268IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
269{
270 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
271 Assert(idxTbLookup < pTb->cTbLookupEntries);
272 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
273#if 1
274 PIEMTB const pNewTb = *ppNewTb;
275 if (pNewTb)
276 {
277 /*
278 * Calculate the flags for the next TB and check if they match.
279 */
280 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
281 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
282 { /* likely */ }
283 else
284 {
285 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
286 fFlags |= IEMTB_F_INHIBIT_SHADOW;
287 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
288 fFlags |= IEMTB_F_INHIBIT_NMI;
289 }
290 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
291 {
292 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
293 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
294 { /* likely */ }
295 else
296 fFlags |= IEMTB_F_CS_LIM_CHECKS;
297 }
298 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
299
300 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
301 {
302 /*
303 * Do the TLB lookup for flat RIP and compare the result with the next TB.
304 *
305 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
306 */
307 /* Calc the effective PC. */
308 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
309 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
310 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
311
312 /* Advance within the current buffer (PAGE) when possible. */
313 RTGCPHYS GCPhysPc;
314 uint64_t off;
315 if ( pVCpu->iem.s.pbInstrBuf
316 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
317 {
318 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
319 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
320 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
321 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
322 else
323 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
324 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
325 }
326 else
327 {
328 pVCpu->iem.s.pbInstrBuf = NULL;
329 pVCpu->iem.s.offCurInstrStart = 0;
330 pVCpu->iem.s.offInstrNextByte = 0;
331 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
332 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
333 }
334
335 if (pNewTb->GCPhysPc == GCPhysPc)
336 {
337 /*
338 * Check for interrupts and stuff.
339 */
340 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
341 * The main problem are the statistics and to some degree the logging. :/ */
342 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
343 {
344 /* Do polling. */
345 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
346 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
347 {
348 /*
349 * Success. Update statistics and switch to the next TB.
350 */
351 if (a_fWithIrqCheck)
352 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
353 else
354 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
355
356 pNewTb->cUsed += 1;
357 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
358 pVCpu->iem.s.pCurTbR3 = pNewTb;
359 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
360 pVCpu->iem.s.cTbExecNative += 1;
361 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
362 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
363 return (uintptr_t)pNewTb->Native.paInstructions;
364 }
365 }
366 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
367 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
368 }
369 else
370 {
371 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
372 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
373 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
374 }
375 }
376 else
377 {
378 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
379 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
380 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
381 }
382 }
383 else
384 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
385#else
386 NOREF(fFlags);
387 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
388#endif
389
390 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
391 return 0;
392}
393
394
395/**
396 * Used by TB code when it wants to raise a \#DE.
397 */
398IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
399{
400 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
401 iemRaiseDivideErrorJmp(pVCpu);
402#ifndef _MSC_VER
403 return VINF_IEM_RAISED_XCPT; /* not reached */
404#endif
405}
406
407
408/**
409 * Used by TB code when it wants to raise a \#UD.
410 */
411IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
412{
413 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
414 iemRaiseUndefinedOpcodeJmp(pVCpu);
415#ifndef _MSC_VER
416 return VINF_IEM_RAISED_XCPT; /* not reached */
417#endif
418}
419
420
421/**
422 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
423 *
424 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
425 */
426IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
427{
428 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
429 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
430 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
431 iemRaiseUndefinedOpcodeJmp(pVCpu);
432 else
433 iemRaiseDeviceNotAvailableJmp(pVCpu);
434#ifndef _MSC_VER
435 return VINF_IEM_RAISED_XCPT; /* not reached */
436#endif
437}
438
439
440/**
441 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
442 *
443 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
444 */
445IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
446{
447 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
448 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
449 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
450 iemRaiseUndefinedOpcodeJmp(pVCpu);
451 else
452 iemRaiseDeviceNotAvailableJmp(pVCpu);
453#ifndef _MSC_VER
454 return VINF_IEM_RAISED_XCPT; /* not reached */
455#endif
456}
457
458
459/**
460 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
461 *
462 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
463 */
464IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
465{
466 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
467 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
468 iemRaiseSimdFpExceptionJmp(pVCpu);
469 else
470 iemRaiseUndefinedOpcodeJmp(pVCpu);
471#ifndef _MSC_VER
472 return VINF_IEM_RAISED_XCPT; /* not reached */
473#endif
474}
475
476
477/**
478 * Used by TB code when it wants to raise a \#NM.
479 */
480IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
481{
482 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
483 iemRaiseDeviceNotAvailableJmp(pVCpu);
484#ifndef _MSC_VER
485 return VINF_IEM_RAISED_XCPT; /* not reached */
486#endif
487}
488
489
490/**
491 * Used by TB code when it wants to raise a \#GP(0).
492 */
493IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
494{
495 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
496 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
497#ifndef _MSC_VER
498 return VINF_IEM_RAISED_XCPT; /* not reached */
499#endif
500}
501
502
503/**
504 * Used by TB code when it wants to raise a \#MF.
505 */
506IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
507{
508 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
509 iemRaiseMathFaultJmp(pVCpu);
510#ifndef _MSC_VER
511 return VINF_IEM_RAISED_XCPT; /* not reached */
512#endif
513}
514
515
516/**
517 * Used by TB code when it wants to raise a \#XF.
518 */
519IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
520{
521 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
522 iemRaiseSimdFpExceptionJmp(pVCpu);
523#ifndef _MSC_VER
524 return VINF_IEM_RAISED_XCPT; /* not reached */
525#endif
526}
527
528
529/**
530 * Used by TB code when detecting opcode changes.
531 * @see iemThreadeFuncWorkerObsoleteTb
532 */
533IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
534{
535 /* We set fSafeToFree to false where as we're being called in the context
536 of a TB callback function, which for native TBs means we cannot release
537 the executable memory till we've returned our way back to iemTbExec as
538 that return path codes via the native code generated for the TB. */
539 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
540 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
541 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
542 return VINF_IEM_REEXEC_BREAK;
543}
544
545
546/**
547 * Used by TB code when we need to switch to a TB with CS.LIM checking.
548 */
549IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
550{
551 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
552 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
553 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
554 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
555 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
556 return VINF_IEM_REEXEC_BREAK;
557}
558
559
560/**
561 * Used by TB code when we missed a PC check after a branch.
562 */
563IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
564{
565 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
566 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
567 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
568 pVCpu->iem.s.pbInstrBuf));
569 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
570 return VINF_IEM_REEXEC_BREAK;
571}
572
573
574
575/*********************************************************************************************************************************
576* Helpers: Segmented memory fetches and stores. *
577*********************************************************************************************************************************/
578
579/**
580 * Used by TB code to load unsigned 8-bit data w/ segmentation.
581 */
582IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
583{
584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
585 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
586#else
587 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
588#endif
589}
590
591
592/**
593 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
594 * to 16 bits.
595 */
596IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
597{
598#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
600#else
601 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
602#endif
603}
604
605
606/**
607 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
608 * to 32 bits.
609 */
610IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
611{
612#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
614#else
615 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
616#endif
617}
618
619/**
620 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
621 * to 64 bits.
622 */
623IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
624{
625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
627#else
628 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
629#endif
630}
631
632
633/**
634 * Used by TB code to load unsigned 16-bit data w/ segmentation.
635 */
636IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
637{
638#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
639 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
640#else
641 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
642#endif
643}
644
645
646/**
647 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
648 * to 32 bits.
649 */
650IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
651{
652#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
654#else
655 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
656#endif
657}
658
659
660/**
661 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
662 * to 64 bits.
663 */
664IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
665{
666#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
668#else
669 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
670#endif
671}
672
673
674/**
675 * Used by TB code to load unsigned 32-bit data w/ segmentation.
676 */
677IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
678{
679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
680 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
681#else
682 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
683#endif
684}
685
686
687/**
688 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
689 * to 64 bits.
690 */
691IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
692{
693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
695#else
696 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
697#endif
698}
699
700
701/**
702 * Used by TB code to load unsigned 64-bit data w/ segmentation.
703 */
704IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
705{
706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
707 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
708#else
709 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
710#endif
711}
712
713
714/**
715 * Used by TB code to load 128-bit data w/ segmentation.
716 */
717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
721#else
722 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load 128-bit data w/ segmentation.
729 */
730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
731{
732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
733 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
734#else
735 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
736#endif
737}
738
739
740/**
741 * Used by TB code to load 128-bit data w/ segmentation.
742 */
743IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
744{
745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
746 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
747#else
748 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
749#endif
750}
751
752
753/**
754 * Used by TB code to load 256-bit data w/ segmentation.
755 */
756IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
757{
758#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
759 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
760#else
761 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
762#endif
763}
764
765
766/**
767 * Used by TB code to load 256-bit data w/ segmentation.
768 */
769IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
770{
771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
772 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
773#else
774 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
775#endif
776}
777
778
779/**
780 * Used by TB code to store unsigned 8-bit data w/ segmentation.
781 */
782IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
783{
784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
785 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
786#else
787 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#endif
789}
790
791
792/**
793 * Used by TB code to store unsigned 16-bit data w/ segmentation.
794 */
795IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
796{
797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
798 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
799#else
800 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#endif
802}
803
804
805/**
806 * Used by TB code to store unsigned 32-bit data w/ segmentation.
807 */
808IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
809{
810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
811 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
812#else
813 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#endif
815}
816
817
818/**
819 * Used by TB code to store unsigned 64-bit data w/ segmentation.
820 */
821IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
822{
823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
824 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
825#else
826 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#endif
828}
829
830
831/**
832 * Used by TB code to store unsigned 128-bit data w/ segmentation.
833 */
834IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
835{
836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
837 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
838#else
839 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
840#endif
841}
842
843
844/**
845 * Used by TB code to store unsigned 128-bit data w/ segmentation.
846 */
847IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
848{
849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
850 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
851#else
852 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
853#endif
854}
855
856
857/**
858 * Used by TB code to store unsigned 256-bit data w/ segmentation.
859 */
860IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
861{
862#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
863 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
864#else
865 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
866#endif
867}
868
869
870/**
871 * Used by TB code to store unsigned 256-bit data w/ segmentation.
872 */
873IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
874{
875#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
876 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
877#else
878 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
879#endif
880}
881
882
883/**
884 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
885 */
886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
887{
888#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
889 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
890#else
891 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
892#endif
893}
894
895
896/**
897 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
898 */
899IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
900{
901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
902 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
903#else
904 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
905#endif
906}
907
908
909/**
910 * Used by TB code to store an 32-bit selector value onto a generic stack.
911 *
912 * Intel CPUs doesn't do write a whole dword, thus the special function.
913 */
914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
915{
916#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
917 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
918#else
919 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
920#endif
921}
922
923
924/**
925 * Used by TB code to push unsigned 64-bit value onto a generic stack.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
930 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
931#else
932 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
939 */
940IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
943 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
944#else
945 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
946#endif
947}
948
949
950/**
951 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
952 */
953IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
956 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
957#else
958 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
959#endif
960}
961
962
963/**
964 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
965 */
966IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
969 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
970#else
971 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
972#endif
973}
974
975
976
977/*********************************************************************************************************************************
978* Helpers: Flat memory fetches and stores. *
979*********************************************************************************************************************************/
980
981/**
982 * Used by TB code to load unsigned 8-bit data w/ flat address.
983 * @note Zero extending the value to 64-bit to simplify assembly.
984 */
985IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
986{
987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
988 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
989#else
990 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
991#endif
992}
993
994
995/**
996 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
997 * to 16 bits.
998 * @note Zero extending the value to 64-bit to simplify assembly.
999 */
1000IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1001{
1002#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1003 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1004#else
1005 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1006#endif
1007}
1008
1009
1010/**
1011 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1012 * to 32 bits.
1013 * @note Zero extending the value to 64-bit to simplify assembly.
1014 */
1015IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1016{
1017#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1018 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1019#else
1020 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1021#endif
1022}
1023
1024
1025/**
1026 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1027 * to 64 bits.
1028 */
1029IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1030{
1031#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1032 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1033#else
1034 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1035#endif
1036}
1037
1038
1039/**
1040 * Used by TB code to load unsigned 16-bit data w/ flat address.
1041 * @note Zero extending the value to 64-bit to simplify assembly.
1042 */
1043IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1044{
1045#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1046 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1047#else
1048 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1049#endif
1050}
1051
1052
1053/**
1054 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1055 * to 32 bits.
1056 * @note Zero extending the value to 64-bit to simplify assembly.
1057 */
1058IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1059{
1060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1061 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1062#else
1063 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1064#endif
1065}
1066
1067
1068/**
1069 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1070 * to 64 bits.
1071 * @note Zero extending the value to 64-bit to simplify assembly.
1072 */
1073IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1074{
1075#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1076 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1077#else
1078 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1079#endif
1080}
1081
1082
1083/**
1084 * Used by TB code to load unsigned 32-bit data w/ flat address.
1085 * @note Zero extending the value to 64-bit to simplify assembly.
1086 */
1087IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1088{
1089#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1090 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1091#else
1092 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1093#endif
1094}
1095
1096
1097/**
1098 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1099 * to 64 bits.
1100 * @note Zero extending the value to 64-bit to simplify assembly.
1101 */
1102IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1103{
1104#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1105 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1106#else
1107 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1108#endif
1109}
1110
1111
1112/**
1113 * Used by TB code to load unsigned 64-bit data w/ flat address.
1114 */
1115IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1116{
1117#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1118 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1119#else
1120 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1121#endif
1122}
1123
1124
1125/**
1126 * Used by TB code to load unsigned 128-bit data w/ flat address.
1127 */
1128IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1129{
1130#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1131 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1132#else
1133 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1134#endif
1135}
1136
1137
1138/**
1139 * Used by TB code to load unsigned 128-bit data w/ flat address.
1140 */
1141IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1142{
1143#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1144 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1145#else
1146 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1147#endif
1148}
1149
1150
1151/**
1152 * Used by TB code to load unsigned 128-bit data w/ flat address.
1153 */
1154IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1155{
1156#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1157 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1158#else
1159 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1160#endif
1161}
1162
1163
1164/**
1165 * Used by TB code to load unsigned 256-bit data w/ flat address.
1166 */
1167IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1168{
1169#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1170 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1171#else
1172 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1173#endif
1174}
1175
1176
1177/**
1178 * Used by TB code to load unsigned 256-bit data w/ flat address.
1179 */
1180IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1181{
1182#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1183 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1184#else
1185 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1186#endif
1187}
1188
1189
1190/**
1191 * Used by TB code to store unsigned 8-bit data w/ flat address.
1192 */
1193IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1194{
1195#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1196 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1197#else
1198 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1199#endif
1200}
1201
1202
1203/**
1204 * Used by TB code to store unsigned 16-bit data w/ flat address.
1205 */
1206IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1207{
1208#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1209 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1210#else
1211 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1212#endif
1213}
1214
1215
1216/**
1217 * Used by TB code to store unsigned 32-bit data w/ flat address.
1218 */
1219IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1220{
1221#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1222 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1223#else
1224 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1225#endif
1226}
1227
1228
1229/**
1230 * Used by TB code to store unsigned 64-bit data w/ flat address.
1231 */
1232IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1233{
1234#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1235 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1236#else
1237 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1238#endif
1239}
1240
1241
1242/**
1243 * Used by TB code to store unsigned 128-bit data w/ flat address.
1244 */
1245IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1246{
1247#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1248 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1249#else
1250 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1251#endif
1252}
1253
1254
1255/**
1256 * Used by TB code to store unsigned 128-bit data w/ flat address.
1257 */
1258IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1259{
1260#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1261 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1262#else
1263 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1264#endif
1265}
1266
1267
1268/**
1269 * Used by TB code to store unsigned 256-bit data w/ flat address.
1270 */
1271IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1272{
1273#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1274 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1275#else
1276 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1277#endif
1278}
1279
1280
1281/**
1282 * Used by TB code to store unsigned 256-bit data w/ flat address.
1283 */
1284IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1285{
1286#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1287 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1288#else
1289 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1290#endif
1291}
1292
1293
1294/**
1295 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1296 */
1297IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1298{
1299#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1300 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1301#else
1302 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1303#endif
1304}
1305
1306
1307/**
1308 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1309 */
1310IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1311{
1312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1313 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1314#else
1315 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1316#endif
1317}
1318
1319
1320/**
1321 * Used by TB code to store a segment selector value onto a flat stack.
1322 *
1323 * Intel CPUs doesn't do write a whole dword, thus the special function.
1324 */
1325IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1326{
1327#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1328 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1329#else
1330 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1331#endif
1332}
1333
1334
1335/**
1336 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1337 */
1338IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1339{
1340#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1341 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1342#else
1343 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1344#endif
1345}
1346
1347
1348/**
1349 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1350 */
1351IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1352{
1353#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1354 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1355#else
1356 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1357#endif
1358}
1359
1360
1361/**
1362 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1363 */
1364IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1365{
1366#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1367 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1368#else
1369 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1370#endif
1371}
1372
1373
1374/**
1375 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1376 */
1377IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1378{
1379#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1380 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1381#else
1382 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1383#endif
1384}
1385
1386
1387
1388/*********************************************************************************************************************************
1389* Helpers: Segmented memory mapping. *
1390*********************************************************************************************************************************/
1391
1392/**
1393 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1394 * segmentation.
1395 */
1396IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1397 RTGCPTR GCPtrMem, uint8_t iSegReg))
1398{
1399#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1400 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1401#else
1402 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1403#endif
1404}
1405
1406
1407/**
1408 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1409 */
1410IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1411 RTGCPTR GCPtrMem, uint8_t iSegReg))
1412{
1413#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1414 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1415#else
1416 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1417#endif
1418}
1419
1420
1421/**
1422 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1423 */
1424IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1425 RTGCPTR GCPtrMem, uint8_t iSegReg))
1426{
1427#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1428 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1429#else
1430 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1431#endif
1432}
1433
1434
1435/**
1436 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1437 */
1438IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1439 RTGCPTR GCPtrMem, uint8_t iSegReg))
1440{
1441#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1442 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1443#else
1444 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1445#endif
1446}
1447
1448
1449/**
1450 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1451 * segmentation.
1452 */
1453IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1454 RTGCPTR GCPtrMem, uint8_t iSegReg))
1455{
1456#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1457 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1458#else
1459 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1460#endif
1461}
1462
1463
1464/**
1465 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1466 */
1467IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1468 RTGCPTR GCPtrMem, uint8_t iSegReg))
1469{
1470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1471 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1472#else
1473 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1474#endif
1475}
1476
1477
1478/**
1479 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1480 */
1481IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1482 RTGCPTR GCPtrMem, uint8_t iSegReg))
1483{
1484#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1485 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1486#else
1487 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1488#endif
1489}
1490
1491
1492/**
1493 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1494 */
1495IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1496 RTGCPTR GCPtrMem, uint8_t iSegReg))
1497{
1498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1499 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1500#else
1501 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1502#endif
1503}
1504
1505
1506/**
1507 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1508 * segmentation.
1509 */
1510IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1511 RTGCPTR GCPtrMem, uint8_t iSegReg))
1512{
1513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1514 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1515#else
1516 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1517#endif
1518}
1519
1520
1521/**
1522 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1523 */
1524IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1525 RTGCPTR GCPtrMem, uint8_t iSegReg))
1526{
1527#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1528 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1529#else
1530 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1531#endif
1532}
1533
1534
1535/**
1536 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1537 */
1538IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1539 RTGCPTR GCPtrMem, uint8_t iSegReg))
1540{
1541#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1542 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1543#else
1544 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1545#endif
1546}
1547
1548
1549/**
1550 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1551 */
1552IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1553 RTGCPTR GCPtrMem, uint8_t iSegReg))
1554{
1555#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1556 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1557#else
1558 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1559#endif
1560}
1561
1562
1563/**
1564 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1565 * segmentation.
1566 */
1567IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1568 RTGCPTR GCPtrMem, uint8_t iSegReg))
1569{
1570#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1571 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1572#else
1573 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1574#endif
1575}
1576
1577
1578/**
1579 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1580 */
1581IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1582 RTGCPTR GCPtrMem, uint8_t iSegReg))
1583{
1584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1585 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1586#else
1587 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1588#endif
1589}
1590
1591
1592/**
1593 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1594 */
1595IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1596 RTGCPTR GCPtrMem, uint8_t iSegReg))
1597{
1598#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1599 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1600#else
1601 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1602#endif
1603}
1604
1605
1606/**
1607 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1610 RTGCPTR GCPtrMem, uint8_t iSegReg))
1611{
1612#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1613 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1614#else
1615 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1616#endif
1617}
1618
1619
1620/**
1621 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1622 */
1623IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1624 RTGCPTR GCPtrMem, uint8_t iSegReg))
1625{
1626#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1627 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1628#else
1629 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1630#endif
1631}
1632
1633
1634/**
1635 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1636 */
1637IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1638 RTGCPTR GCPtrMem, uint8_t iSegReg))
1639{
1640#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1641 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1642#else
1643 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1644#endif
1645}
1646
1647
1648/**
1649 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1650 * segmentation.
1651 */
1652IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1653 RTGCPTR GCPtrMem, uint8_t iSegReg))
1654{
1655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1656 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1657#else
1658 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1659#endif
1660}
1661
1662
1663/**
1664 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1665 */
1666IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1667 RTGCPTR GCPtrMem, uint8_t iSegReg))
1668{
1669#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1670 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1671#else
1672 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1681 RTGCPTR GCPtrMem, uint8_t iSegReg))
1682{
1683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1684 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1685#else
1686 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1687#endif
1688}
1689
1690
1691/**
1692 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1693 */
1694IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1695 RTGCPTR GCPtrMem, uint8_t iSegReg))
1696{
1697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1698 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1699#else
1700 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1701#endif
1702}
1703
1704
1705/*********************************************************************************************************************************
1706* Helpers: Flat memory mapping. *
1707*********************************************************************************************************************************/
1708
1709/**
1710 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1711 * address.
1712 */
1713IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1714{
1715#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1716 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1717#else
1718 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1719#endif
1720}
1721
1722
1723/**
1724 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1725 */
1726IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1727{
1728#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1729 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1730#else
1731 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1732#endif
1733}
1734
1735
1736/**
1737 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1738 */
1739IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1740{
1741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1742 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1743#else
1744 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1745#endif
1746}
1747
1748
1749/**
1750 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1751 */
1752IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1753{
1754#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1755 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1756#else
1757 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1758#endif
1759}
1760
1761
1762/**
1763 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1764 * address.
1765 */
1766IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1767{
1768#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1769 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1770#else
1771 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1772#endif
1773}
1774
1775
1776/**
1777 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1778 */
1779IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1780{
1781#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1782 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1783#else
1784 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1785#endif
1786}
1787
1788
1789/**
1790 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1791 */
1792IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1793{
1794#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1795 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1796#else
1797 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1798#endif
1799}
1800
1801
1802/**
1803 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1804 */
1805IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1806{
1807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1808 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1809#else
1810 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1811#endif
1812}
1813
1814
1815/**
1816 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1817 * address.
1818 */
1819IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1820{
1821#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1822 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1823#else
1824 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1825#endif
1826}
1827
1828
1829/**
1830 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1831 */
1832IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1833{
1834#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1835 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1836#else
1837 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1838#endif
1839}
1840
1841
1842/**
1843 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1844 */
1845IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1846{
1847#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1848 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1849#else
1850 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1851#endif
1852}
1853
1854
1855/**
1856 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1857 */
1858IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1859{
1860#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1861 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1862#else
1863 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1864#endif
1865}
1866
1867
1868/**
1869 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1870 * address.
1871 */
1872IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1873{
1874#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1875 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1876#else
1877 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1878#endif
1879}
1880
1881
1882/**
1883 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1884 */
1885IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1886{
1887#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1888 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1889#else
1890 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1891#endif
1892}
1893
1894
1895/**
1896 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1897 */
1898IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1899{
1900#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1901 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1902#else
1903 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1904#endif
1905}
1906
1907
1908/**
1909 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1910 */
1911IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1912{
1913#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1914 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1915#else
1916 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1917#endif
1918}
1919
1920
1921/**
1922 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1923 */
1924IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1925{
1926#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1927 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1928#else
1929 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1930#endif
1931}
1932
1933
1934/**
1935 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1936 */
1937IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1938{
1939#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1940 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1941#else
1942 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1943#endif
1944}
1945
1946
1947/**
1948 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1949 * address.
1950 */
1951IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1952{
1953#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1954 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1955#else
1956 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1957#endif
1958}
1959
1960
1961/**
1962 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1963 */
1964IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1965{
1966#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1967 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1968#else
1969 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1970#endif
1971}
1972
1973
1974/**
1975 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1976 */
1977IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1978{
1979#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1980 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1981#else
1982 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1983#endif
1984}
1985
1986
1987/**
1988 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1993 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1994#else
1995 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1996#endif
1997}
1998
1999
2000/*********************************************************************************************************************************
2001* Helpers: Commit, rollback & unmap *
2002*********************************************************************************************************************************/
2003
2004/**
2005 * Used by TB code to commit and unmap a read-write memory mapping.
2006 */
2007IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2008{
2009 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2010}
2011
2012
2013/**
2014 * Used by TB code to commit and unmap a read-write memory mapping.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2017{
2018 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2019}
2020
2021
2022/**
2023 * Used by TB code to commit and unmap a write-only memory mapping.
2024 */
2025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2026{
2027 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2028}
2029
2030
2031/**
2032 * Used by TB code to commit and unmap a read-only memory mapping.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2035{
2036 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2037}
2038
2039
2040/**
2041 * Reinitializes the native recompiler state.
2042 *
2043 * Called before starting a new recompile job.
2044 */
2045static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2046{
2047 pReNative->cLabels = 0;
2048 pReNative->bmLabelTypes = 0;
2049 pReNative->cFixups = 0;
2050 pReNative->cTbExitFixups = 0;
2051#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2052 pReNative->pDbgInfo->cEntries = 0;
2053 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2054#endif
2055 pReNative->pTbOrg = pTb;
2056 pReNative->cCondDepth = 0;
2057 pReNative->uCondSeqNo = 0;
2058 pReNative->uCheckIrqSeqNo = 0;
2059 pReNative->uTlbSeqNo = 0;
2060#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2061 pReNative->fSkippingEFlags = 0;
2062#endif
2063#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2064 pReNative->PostponedEfl.fEFlags = 0;
2065 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2066 pReNative->PostponedEfl.cOpBits = 0;
2067 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2068 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2069#endif
2070
2071#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2072 pReNative->Core.offPc = 0;
2073# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2074 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2075# endif
2076# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2077 pReNative->Core.fDebugPcInitialized = false;
2078# endif
2079#endif
2080 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2081 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2082#if IEMNATIVE_HST_GREG_COUNT < 32
2083 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2084#endif
2085 ;
2086 pReNative->Core.bmHstRegsWithGstShadow = 0;
2087 pReNative->Core.bmGstRegShadows = 0;
2088#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2089 pReNative->Core.bmGstRegShadowDirty = 0;
2090#endif
2091 pReNative->Core.bmVars = 0;
2092 pReNative->Core.bmStack = 0;
2093 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2094 pReNative->Core.u64ArgVars = UINT64_MAX;
2095
2096 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2097 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2120
2121 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2122
2123 /* Full host register reinit: */
2124 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2125 {
2126 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2127 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2128 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2129 }
2130
2131 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2132 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2133#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2134 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2135#endif
2136#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2137 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2138#endif
2139#ifdef IEMNATIVE_REG_FIXED_TMP1
2140 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2141#endif
2142#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2143 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2144#endif
2145 );
2146 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2147 {
2148 fRegs &= ~RT_BIT_32(idxReg);
2149 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2150 }
2151
2152 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2153#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2154 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2155#endif
2156#ifdef IEMNATIVE_REG_FIXED_TMP0
2157 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2158#endif
2159#ifdef IEMNATIVE_REG_FIXED_TMP1
2160 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2161#endif
2162#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2163 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2164#endif
2165
2166 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2167#if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2168 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2169#endif
2170 ;
2171 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2172 pReNative->Core.bmGstSimdRegShadows = 0;
2173 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2174 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2175
2176 /* Full host register reinit: */
2177 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2178 {
2179 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2180 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2181 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2182 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2183 }
2184
2185 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2186 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2187 {
2188 fRegs &= ~RT_BIT_32(idxReg);
2189 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2190 }
2191
2192#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2193 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2194#endif
2195
2196 return pReNative;
2197}
2198
2199
2200/**
2201 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2202 */
2203static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2204{
2205 RTMemFree(pReNative->pInstrBuf);
2206 RTMemFree(pReNative->paLabels);
2207 RTMemFree(pReNative->paFixups);
2208 RTMemFree(pReNative->paTbExitFixups);
2209#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2210 RTMemFree(pReNative->pDbgInfo);
2211#endif
2212 RTMemFree(pReNative);
2213}
2214
2215
2216/**
2217 * Allocates and initializes the native recompiler state.
2218 *
2219 * This is called the first time an EMT wants to recompile something.
2220 *
2221 * @returns Pointer to the new recompiler state.
2222 * @param pVCpu The cross context virtual CPU structure of the calling
2223 * thread.
2224 * @param pTb The TB that's about to be recompiled. When this is NULL,
2225 * the recompiler state is for emitting the common per-chunk
2226 * code from iemNativeRecompileAttachExecMemChunkCtx.
2227 * @thread EMT(pVCpu)
2228 */
2229static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2230{
2231 VMCPU_ASSERT_EMT(pVCpu);
2232
2233 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2234 AssertReturn(pReNative, NULL);
2235
2236 /*
2237 * Try allocate all the buffers and stuff we need.
2238 */
2239 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2240 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2241 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2242 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2243 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2244#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2245 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2246#endif
2247 if (RT_LIKELY( pReNative->pInstrBuf
2248 && pReNative->paLabels
2249 && pReNative->paFixups
2250 && pReNative->paTbExitFixups)
2251#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2252 && pReNative->pDbgInfo
2253#endif
2254 )
2255 {
2256 /*
2257 * Set the buffer & array sizes on success.
2258 */
2259 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2260 pReNative->cLabelsAlloc = _8K / cFactor;
2261 pReNative->cFixupsAlloc = _16K / cFactor;
2262 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2263#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2264 pReNative->cDbgInfoAlloc = _16K / cFactor;
2265#endif
2266
2267 /* Other constant stuff: */
2268 pReNative->pVCpu = pVCpu;
2269
2270 /*
2271 * Done, just reinit it.
2272 */
2273 return iemNativeReInit(pReNative, pTb);
2274 }
2275
2276 /*
2277 * Failed. Cleanup and return.
2278 */
2279 AssertFailed();
2280 iemNativeTerm(pReNative);
2281 return NULL;
2282}
2283
2284
2285/**
2286 * Creates a label
2287 *
2288 * If the label does not yet have a defined position,
2289 * call iemNativeLabelDefine() later to set it.
2290 *
2291 * @returns Label ID. Throws VBox status code on failure, so no need to check
2292 * the return value.
2293 * @param pReNative The native recompile state.
2294 * @param enmType The label type.
2295 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2296 * label is not yet defined (default).
2297 * @param uData Data associated with the lable. Only applicable to
2298 * certain type of labels. Default is zero.
2299 */
2300DECL_HIDDEN_THROW(uint32_t)
2301iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2302 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2303{
2304 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2305#if defined(RT_ARCH_AMD64)
2306 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2307#endif
2308
2309 /*
2310 * Locate existing label definition.
2311 *
2312 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2313 * and uData is zero.
2314 */
2315 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2316 uint32_t const cLabels = pReNative->cLabels;
2317 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2318#ifndef VBOX_STRICT
2319 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2320 && offWhere == UINT32_MAX
2321 && uData == 0
2322#endif
2323 )
2324 {
2325#ifndef VBOX_STRICT
2326 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2327 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2328 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2329 if (idxLabel < pReNative->cLabels)
2330 return idxLabel;
2331#else
2332 for (uint32_t i = 0; i < cLabels; i++)
2333 if ( paLabels[i].enmType == enmType
2334 && paLabels[i].uData == uData)
2335 {
2336 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2337 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2338 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2339 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2340 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2341 return i;
2342 }
2343 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2344 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2345#endif
2346 }
2347
2348 /*
2349 * Make sure we've got room for another label.
2350 */
2351 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2352 { /* likely */ }
2353 else
2354 {
2355 uint32_t cNew = pReNative->cLabelsAlloc;
2356 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2357 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2358 cNew *= 2;
2359 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2360 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2361 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2362 pReNative->paLabels = paLabels;
2363 pReNative->cLabelsAlloc = cNew;
2364 }
2365
2366 /*
2367 * Define a new label.
2368 */
2369 paLabels[cLabels].off = offWhere;
2370 paLabels[cLabels].enmType = enmType;
2371 paLabels[cLabels].uData = uData;
2372 pReNative->cLabels = cLabels + 1;
2373
2374 Assert((unsigned)enmType < 64);
2375 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2376
2377 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2378 {
2379 Assert(uData == 0);
2380 pReNative->aidxUniqueLabels[enmType] = cLabels;
2381 }
2382
2383 if (offWhere != UINT32_MAX)
2384 {
2385#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2386 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2387 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2388#endif
2389 }
2390 return cLabels;
2391}
2392
2393
2394/**
2395 * Defines the location of an existing label.
2396 *
2397 * @param pReNative The native recompile state.
2398 * @param idxLabel The label to define.
2399 * @param offWhere The position.
2400 */
2401DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2402{
2403 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2404 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2405 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2406 pLabel->off = offWhere;
2407#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2408 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2409 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2410#endif
2411}
2412
2413
2414/**
2415 * Looks up a lable.
2416 *
2417 * @returns Label ID if found, UINT32_MAX if not.
2418 */
2419DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2420 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2421{
2422 Assert((unsigned)enmType < 64);
2423 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2424 {
2425 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2426 return pReNative->aidxUniqueLabels[enmType];
2427
2428 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2429 uint32_t const cLabels = pReNative->cLabels;
2430 for (uint32_t i = 0; i < cLabels; i++)
2431 if ( paLabels[i].enmType == enmType
2432 && paLabels[i].uData == uData
2433 && ( paLabels[i].off == offWhere
2434 || offWhere == UINT32_MAX
2435 || paLabels[i].off == UINT32_MAX))
2436 return i;
2437 }
2438 return UINT32_MAX;
2439}
2440
2441
2442/**
2443 * Adds a fixup.
2444 *
2445 * @throws VBox status code (int) on failure.
2446 * @param pReNative The native recompile state.
2447 * @param offWhere The instruction offset of the fixup location.
2448 * @param idxLabel The target label ID for the fixup.
2449 * @param enmType The fixup type.
2450 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2451 */
2452DECL_HIDDEN_THROW(void)
2453iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2454 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2455{
2456 Assert(idxLabel <= UINT16_MAX);
2457 Assert((unsigned)enmType <= UINT8_MAX);
2458#ifdef RT_ARCH_ARM64
2459 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2460 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2461 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2462#endif
2463
2464 /*
2465 * Make sure we've room.
2466 */
2467 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2468 uint32_t const cFixups = pReNative->cFixups;
2469 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2470 { /* likely */ }
2471 else
2472 {
2473 uint32_t cNew = pReNative->cFixupsAlloc;
2474 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2475 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2476 cNew *= 2;
2477 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2478 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2479 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2480 pReNative->paFixups = paFixups;
2481 pReNative->cFixupsAlloc = cNew;
2482 }
2483
2484 /*
2485 * Add the fixup.
2486 */
2487 paFixups[cFixups].off = offWhere;
2488 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2489 paFixups[cFixups].enmType = enmType;
2490 paFixups[cFixups].offAddend = offAddend;
2491 pReNative->cFixups = cFixups + 1;
2492}
2493
2494
2495/**
2496 * Adds a fixup to the per chunk tail code.
2497 *
2498 * @throws VBox status code (int) on failure.
2499 * @param pReNative The native recompile state.
2500 * @param offWhere The instruction offset of the fixup location.
2501 * @param enmExitReason The exit reason to jump to.
2502 */
2503DECL_HIDDEN_THROW(void)
2504iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2505{
2506 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2507
2508 /*
2509 * Make sure we've room.
2510 */
2511 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2512 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2513 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2514 { /* likely */ }
2515 else
2516 {
2517 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2518 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2519 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2520 cNew *= 2;
2521 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2522 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2523 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2524 pReNative->paTbExitFixups = paTbExitFixups;
2525 pReNative->cTbExitFixupsAlloc = cNew;
2526 }
2527
2528 /*
2529 * Add the fixup.
2530 */
2531 paTbExitFixups[cTbExitFixups].off = offWhere;
2532 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2533 pReNative->cTbExitFixups = cTbExitFixups + 1;
2534}
2535
2536
2537/**
2538 * Slow code path for iemNativeInstrBufEnsure.
2539 */
2540DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2541{
2542 /* Double the buffer size till we meet the request. */
2543 uint32_t cNew = pReNative->cInstrBufAlloc;
2544 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2545 do
2546 cNew *= 2;
2547 while (cNew < off + cInstrReq);
2548
2549 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2550#ifdef RT_ARCH_ARM64
2551 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2552#else
2553 uint32_t const cbMaxInstrBuf = _2M;
2554#endif
2555 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2556
2557 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2558 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2559
2560#ifdef VBOX_STRICT
2561 pReNative->offInstrBufChecked = off + cInstrReq;
2562#endif
2563 pReNative->cInstrBufAlloc = cNew;
2564 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2565}
2566
2567#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2568
2569/**
2570 * Grows the static debug info array used during recompilation.
2571 *
2572 * @returns Pointer to the new debug info block; throws VBox status code on
2573 * failure, so no need to check the return value.
2574 */
2575DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2576{
2577 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2578 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2579 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2580 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2581 pReNative->pDbgInfo = pDbgInfo;
2582 pReNative->cDbgInfoAlloc = cNew;
2583 return pDbgInfo;
2584}
2585
2586
2587/**
2588 * Adds a new debug info uninitialized entry, returning the pointer to it.
2589 */
2590DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2591{
2592 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2593 { /* likely */ }
2594 else
2595 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2596 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2597}
2598
2599
2600/**
2601 * Debug Info: Adds a native offset record, if necessary.
2602 */
2603DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2604{
2605 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2606
2607 /*
2608 * Do we need this one?
2609 */
2610 uint32_t const offPrev = pDbgInfo->offNativeLast;
2611 if (offPrev == off)
2612 return;
2613 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2614
2615 /*
2616 * Add it.
2617 */
2618 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2619 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2620 pEntry->NativeOffset.offNative = off;
2621 pDbgInfo->offNativeLast = off;
2622}
2623
2624
2625/**
2626 * Debug Info: Record info about a label.
2627 */
2628static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2629{
2630 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2631 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2632 pEntry->Label.uUnused = 0;
2633 pEntry->Label.enmLabel = (uint8_t)enmType;
2634 pEntry->Label.uData = uData;
2635}
2636
2637
2638/**
2639 * Debug Info: Record info about a threaded call.
2640 */
2641static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2642{
2643 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2644 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2645 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2646 pEntry->ThreadedCall.uUnused = 0;
2647 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2648}
2649
2650
2651/**
2652 * Debug Info: Record info about a new guest instruction.
2653 */
2654static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2655{
2656 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2657 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2658 pEntry->GuestInstruction.uUnused = 0;
2659 pEntry->GuestInstruction.fExec = fExec;
2660}
2661
2662
2663/**
2664 * Debug Info: Record info about guest register shadowing.
2665 */
2666DECL_HIDDEN_THROW(void)
2667iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2668 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2672 pEntry->GuestRegShadowing.uUnused = 0;
2673 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2674 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2675 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2676# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2677 Assert( idxHstReg != UINT8_MAX
2678 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2679# endif
2680}
2681
2682
2683/**
2684 * Debug Info: Record info about guest register shadowing.
2685 */
2686DECL_HIDDEN_THROW(void)
2687iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2688 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2689{
2690 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2691 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2692 pEntry->GuestSimdRegShadowing.uUnused = 0;
2693 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2694 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2695 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2696}
2697
2698
2699# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2700/**
2701 * Debug Info: Record info about delayed RIP updates.
2702 */
2703DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2704{
2705 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2706 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2707 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2708 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2709}
2710# endif
2711
2712
2713/**
2714 * Debug Info: Record info about a dirty guest register.
2715 */
2716DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2717 uint8_t idxGstReg, uint8_t idxHstReg)
2718{
2719 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2720 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2721 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2722 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2723 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2724}
2725
2726
2727/**
2728 * Debug Info: Record info about a dirty guest register writeback operation.
2729 */
2730DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2731{
2732 unsigned const cBitsGstRegMask = 25;
2733 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2734
2735 /* The first block of 25 bits: */
2736 if (fGstReg & fGstRegMask)
2737 {
2738 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2739 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2740 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2741 pEntry->GuestRegWriteback.cShift = 0;
2742 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2743 fGstReg &= ~(uint64_t)fGstRegMask;
2744 if (!fGstReg)
2745 return;
2746 }
2747
2748 /* The second block of 25 bits: */
2749 fGstReg >>= cBitsGstRegMask;
2750 if (fGstReg & fGstRegMask)
2751 {
2752 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2753 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2754 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2755 pEntry->GuestRegWriteback.cShift = 0;
2756 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2757 fGstReg &= ~(uint64_t)fGstRegMask;
2758 if (!fGstReg)
2759 return;
2760 }
2761
2762 /* The last block with 14 bits: */
2763 fGstReg >>= cBitsGstRegMask;
2764 Assert(fGstReg & fGstRegMask);
2765 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2766 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2767 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2768 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2769 pEntry->GuestRegWriteback.cShift = 2;
2770 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2771}
2772
2773
2774# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2775/**
2776 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2777 */
2778DECL_HIDDEN_THROW(void)
2779iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2780 uint8_t cOpBits, uint8_t idxEmit)
2781{
2782 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2783 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2784 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2785 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2786 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2787 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2788 pEntry->PostponedEflCalc.uUnused = 0;
2789}
2790# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2791
2792#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2793
2794
2795/*********************************************************************************************************************************
2796* Register Allocator *
2797*********************************************************************************************************************************/
2798
2799/**
2800 * Register parameter indexes (indexed by argument number).
2801 */
2802DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2803{
2804 IEMNATIVE_CALL_ARG0_GREG,
2805 IEMNATIVE_CALL_ARG1_GREG,
2806 IEMNATIVE_CALL_ARG2_GREG,
2807 IEMNATIVE_CALL_ARG3_GREG,
2808#if defined(IEMNATIVE_CALL_ARG4_GREG)
2809 IEMNATIVE_CALL_ARG4_GREG,
2810# if defined(IEMNATIVE_CALL_ARG5_GREG)
2811 IEMNATIVE_CALL_ARG5_GREG,
2812# if defined(IEMNATIVE_CALL_ARG6_GREG)
2813 IEMNATIVE_CALL_ARG6_GREG,
2814# if defined(IEMNATIVE_CALL_ARG7_GREG)
2815 IEMNATIVE_CALL_ARG7_GREG,
2816# endif
2817# endif
2818# endif
2819#endif
2820};
2821AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2822
2823/**
2824 * Call register masks indexed by argument count.
2825 */
2826DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2827{
2828 0,
2829 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2830 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2832 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2833 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2834#if defined(IEMNATIVE_CALL_ARG4_GREG)
2835 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2836 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2837# if defined(IEMNATIVE_CALL_ARG5_GREG)
2838 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2839 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2840# if defined(IEMNATIVE_CALL_ARG6_GREG)
2841 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2842 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2843 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2844# if defined(IEMNATIVE_CALL_ARG7_GREG)
2845 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2846 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2847 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2848# endif
2849# endif
2850# endif
2851#endif
2852};
2853
2854#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2855/**
2856 * BP offset of the stack argument slots.
2857 *
2858 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2859 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2860 */
2861DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2862{
2863 IEMNATIVE_FP_OFF_STACK_ARG0,
2864# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2865 IEMNATIVE_FP_OFF_STACK_ARG1,
2866# endif
2867# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2868 IEMNATIVE_FP_OFF_STACK_ARG2,
2869# endif
2870# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2871 IEMNATIVE_FP_OFF_STACK_ARG3,
2872# endif
2873};
2874AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2875#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2876
2877/**
2878 * Info about shadowed guest register values.
2879 * @see IEMNATIVEGSTREG
2880 */
2881DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2882{
2883#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2884 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2899 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2900 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2901 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2902 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2903 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2904 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2907 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2908 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2909 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2910 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2913 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2914 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2915 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2916 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2919 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2920 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2921 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2922 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2923 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2924 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2925 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2926 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2927 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2928 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2929 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2930 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2931 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2932 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2933 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2934 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2935 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2936 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2937 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2938#undef CPUMCTX_OFF_AND_SIZE
2939};
2940AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2941
2942
2943/** Host CPU general purpose register names. */
2944DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2945{
2946#ifdef RT_ARCH_AMD64
2947 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2948#elif RT_ARCH_ARM64
2949 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2950 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2951#else
2952# error "port me"
2953#endif
2954};
2955
2956
2957#if 0 /* unused */
2958/**
2959 * Tries to locate a suitable register in the given register mask.
2960 *
2961 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2962 * failed.
2963 *
2964 * @returns Host register number on success, returns UINT8_MAX on failure.
2965 */
2966static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2967{
2968 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2969 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2970 if (fRegs)
2971 {
2972 /** @todo pick better here: */
2973 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2974
2975 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2976 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2977 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2978 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2979
2980 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2981 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2982 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2983 return idxReg;
2984 }
2985 return UINT8_MAX;
2986}
2987#endif /* unused */
2988
2989#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2990
2991/**
2992 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2993 *
2994 * @returns New code buffer offset on success, UINT32_MAX on failure.
2995 * @param pReNative .
2996 * @param off The current code buffer position.
2997 * @param enmGstReg The guest register to store to.
2998 * @param idxHstReg The host register to store from.
2999 */
3000DECL_FORCE_INLINE_THROW(uint32_t)
3001iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3002{
3003 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3004 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3005
3006 switch (g_aGstShadowInfo[enmGstReg].cb)
3007 {
3008 case sizeof(uint64_t):
3009 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3010 case sizeof(uint32_t):
3011 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3012 case sizeof(uint16_t):
3013 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3014# if 0 /* not present in the table. */
3015 case sizeof(uint8_t):
3016 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3017# endif
3018 default:
3019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3020 }
3021}
3022
3023
3024/**
3025 * Emits code to flush a pending write of the given guest register,
3026 * version with alternative core state.
3027 *
3028 * @returns New code buffer offset.
3029 * @param pReNative The native recompile state.
3030 * @param off Current code buffer position.
3031 * @param pCore Alternative core state.
3032 * @param enmGstReg The guest register to flush.
3033 */
3034DECL_HIDDEN_THROW(uint32_t)
3035iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3036{
3037 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3038
3039 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3040 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3041 || enmGstReg == kIemNativeGstReg_MxCsr);
3042 Assert( idxHstReg != UINT8_MAX
3043 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3044 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3045 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3046
3047 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3048
3049 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3050 return off;
3051}
3052
3053
3054/**
3055 * Emits code to flush a pending write of the given guest register.
3056 *
3057 * @returns New code buffer offset.
3058 * @param pReNative The native recompile state.
3059 * @param off Current code buffer position.
3060 * @param enmGstReg The guest register to flush.
3061 */
3062DECL_HIDDEN_THROW(uint32_t)
3063iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3064{
3065 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3066
3067 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3068 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3069 || enmGstReg == kIemNativeGstReg_MxCsr);
3070 Assert( idxHstReg != UINT8_MAX
3071 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3072 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3073 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3074
3075 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3076
3077 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3078 return off;
3079}
3080
3081
3082/**
3083 * Flush the given set of guest registers if marked as dirty.
3084 *
3085 * @returns New code buffer offset.
3086 * @param pReNative The native recompile state.
3087 * @param off Current code buffer position.
3088 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3089 * @note Must not modify the host status flags!
3090 */
3091DECL_HIDDEN_THROW(uint32_t)
3092iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3093{
3094 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3095 if (bmGstRegShadowDirty)
3096 {
3097# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3098 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3099 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3100# endif
3101 do
3102 {
3103 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3104 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3105 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3106 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3107 } while (bmGstRegShadowDirty);
3108 }
3109
3110 return off;
3111}
3112
3113
3114/**
3115 * Flush all shadowed guest registers marked as dirty for the given host register.
3116 *
3117 * @returns New code buffer offset.
3118 * @param pReNative The native recompile state.
3119 * @param off Current code buffer position.
3120 * @param idxHstReg The host register.
3121 *
3122 * @note This doesn't do any unshadowing of guest registers from the host register.
3123 *
3124 * @note Must not modify the host status flags!
3125 */
3126DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3127{
3128 /* We need to flush any pending guest register writes this host register shadows. */
3129 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3130 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3131 {
3132# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3133 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3134 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3135# endif
3136 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3137 do
3138 {
3139 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3140 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3141 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3142 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3143 } while (bmGstRegShadowDirty);
3144 }
3145
3146 return off;
3147}
3148
3149#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3150
3151
3152/**
3153 * Locate a register, possibly freeing one up.
3154 *
3155 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3156 * failed.
3157 *
3158 * @returns Host register number on success. Returns UINT8_MAX if no registers
3159 * found, the caller is supposed to deal with this and raise a
3160 * allocation type specific status code (if desired).
3161 *
3162 * @throws VBox status code if we're run into trouble spilling a variable of
3163 * recording debug info. Does NOT throw anything if we're out of
3164 * registers, though.
3165 *
3166 * @note Must not modify the host status flags!
3167 */
3168static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3169 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3170{
3171 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3172 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3173 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3174
3175 /*
3176 * Try a freed register that's shadowing a guest register.
3177 */
3178 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3179 if (fRegs)
3180 {
3181 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3182
3183#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3184 /*
3185 * When we have liveness information, we use it to kick out all shadowed
3186 * guest register that will not be needed any more in this TB. If we're
3187 * lucky, this may prevent us from ending up here again.
3188 *
3189 * Note! We must consider the previous entry here so we don't free
3190 * anything that the current threaded function requires (current
3191 * entry is produced by the next threaded function).
3192 */
3193 uint32_t const idxCurCall = pReNative->idxCurCall;
3194 if (idxCurCall > 0)
3195 {
3196 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3197 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3198
3199 /* Merge EFLAGS. */
3200 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3201 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3202 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3203 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3204 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3205
3206 /* If it matches any shadowed registers. */
3207 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3208 {
3209#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3210 /* Writeback any dirty shadow registers we are about to unshadow. */
3211 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3212#endif
3213
3214 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3215 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3216 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3217
3218 /* See if we've got any unshadowed registers we can return now. */
3219 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3220 if (fUnshadowedRegs)
3221 {
3222 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3223 return (fPreferVolatile
3224 ? ASMBitFirstSetU32(fUnshadowedRegs)
3225 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3226 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3227 - 1;
3228 }
3229 }
3230 }
3231#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3232
3233 unsigned const idxReg = (fPreferVolatile
3234 ? ASMBitFirstSetU32(fRegs)
3235 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3236 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3237 - 1;
3238
3239 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3240 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3241 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3242 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3243
3244#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3245 /* We need to flush any pending guest register writes this host register shadows. */
3246 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3247#endif
3248
3249 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3250 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3251 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3252 return idxReg;
3253 }
3254
3255 /*
3256 * Try free up a variable that's in a register.
3257 *
3258 * We do two rounds here, first evacuating variables we don't need to be
3259 * saved on the stack, then in the second round move things to the stack.
3260 */
3261 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3262 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3263 {
3264 uint32_t fVars = pReNative->Core.bmVars;
3265 while (fVars)
3266 {
3267 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3268 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the GPR allocator) */
3269 {
3270 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3271 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3272 && (RT_BIT_32(idxReg) & fRegMask)
3273 && ( iLoop == 0
3274 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3275 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3276 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3277 {
3278 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3279 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3280 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3281 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3282 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3283 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3284#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3285 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3286#endif
3287
3288 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3289 {
3290 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3291 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3292 }
3293
3294 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3295 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3296
3297 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3298 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3299 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3300 return idxReg;
3301 }
3302 }
3303 fVars &= ~RT_BIT_32(idxVar);
3304 }
3305 }
3306
3307 return UINT8_MAX;
3308}
3309
3310
3311/**
3312 * Reassigns a variable to a different register specified by the caller.
3313 *
3314 * @returns The new code buffer position.
3315 * @param pReNative The native recompile state.
3316 * @param off The current code buffer position.
3317 * @param idxVar The variable index.
3318 * @param idxRegOld The old host register number.
3319 * @param idxRegNew The new host register number.
3320 * @param pszCaller The caller for logging.
3321 */
3322static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3323 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3324{
3325 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3326 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3327 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3328 RT_NOREF(pszCaller);
3329
3330#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3331 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3332#endif
3333 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3334
3335 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3337 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3338#endif
3339 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3340 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3341 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3342
3343 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3344 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3345 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3346 if (fGstRegShadows)
3347 {
3348 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3349 | RT_BIT_32(idxRegNew);
3350 while (fGstRegShadows)
3351 {
3352 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3353 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3354
3355 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3356 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3357 }
3358 }
3359
3360 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3361 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3362 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3363 return off;
3364}
3365
3366
3367/**
3368 * Moves a variable to a different register or spills it onto the stack.
3369 *
3370 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3371 * kinds can easily be recreated if needed later.
3372 *
3373 * @returns The new code buffer position.
3374 * @param pReNative The native recompile state.
3375 * @param off The current code buffer position.
3376 * @param idxVar The variable index.
3377 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3378 * call-volatile registers.
3379 */
3380DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3381 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3382{
3383 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3384 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3385 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3386 Assert(!pVar->fRegAcquired);
3387
3388 uint8_t const idxRegOld = pVar->idxReg;
3389 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3390 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3391 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3392 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3393 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3394 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3395 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3396 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3397#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3398 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3399#endif
3400
3401
3402 /** @todo Add statistics on this.*/
3403 /** @todo Implement basic variable liveness analysis (python) so variables
3404 * can be freed immediately once no longer used. This has the potential to
3405 * be trashing registers and stack for dead variables.
3406 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3407
3408 /*
3409 * First try move it to a different register, as that's cheaper.
3410 */
3411 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3412 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3413 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3414 if (fRegs)
3415 {
3416 /* Avoid using shadow registers, if possible. */
3417 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3418 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3419 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3420 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3421 }
3422
3423 /*
3424 * Otherwise we must spill the register onto the stack.
3425 */
3426 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3427 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3428 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3429 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3430
3431 pVar->idxReg = UINT8_MAX;
3432 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3433 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3434 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3435 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3436 return off;
3437}
3438
3439
3440/**
3441 * Allocates a temporary host general purpose register.
3442 *
3443 * This may emit code to save register content onto the stack in order to free
3444 * up a register.
3445 *
3446 * @returns The host register number; throws VBox status code on failure,
3447 * so no need to check the return value.
3448 * @param pReNative The native recompile state.
3449 * @param poff Pointer to the variable with the code buffer
3450 * position. This will be update if we need to move
3451 * a variable from register to stack in order to
3452 * satisfy the request.
3453 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3454 * registers (@c true, default) or the other way
3455 * around (@c false, for
3456 * iemNativeRegAllocTmpForGuestReg()).
3457 *
3458 * @note Must not modify the host status flags!
3459 */
3460template<bool const a_fPreferVolatile>
3461DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3462{
3463 /*
3464 * Try find a completely unused register, preferably a call-volatile one.
3465 */
3466 uint8_t idxReg;
3467 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3468 & ~pReNative->Core.bmHstRegsWithGstShadow
3469 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3470 if (fRegs)
3471 {
3472 if (a_fPreferVolatile)
3473 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3474 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3475 else
3476 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3477 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3478 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3479 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3480 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3481 }
3482 else
3483 {
3484 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile);
3485 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3486 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3487 }
3488 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3489}
3490
3491
3492/** See iemNativeRegAllocTmpInt for details. */
3493DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3494{
3495 return iemNativeRegAllocTmpInt<true>(pReNative, poff);
3496}
3497
3498
3499/** See iemNativeRegAllocTmpInt for details. */
3500DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3501{
3502 return iemNativeRegAllocTmpInt<false>(pReNative, poff);
3503}
3504
3505
3506/**
3507 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3508 * registers.
3509 *
3510 * @returns The host register number; throws VBox status code on failure,
3511 * so no need to check the return value.
3512 * @param pReNative The native recompile state.
3513 * @param poff Pointer to the variable with the code buffer
3514 * position. This will be update if we need to move
3515 * a variable from register to stack in order to
3516 * satisfy the request.
3517 * @param fRegMask Mask of acceptable registers.
3518 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3519 * registers (@c true, default) or the other way
3520 * around (@c false, for
3521 * iemNativeRegAllocTmpForGuestReg()).
3522 */
3523template<bool const a_fPreferVolatile>
3524DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3525{
3526 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3527 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3528
3529 /*
3530 * Try find a completely unused register, preferably a call-volatile one.
3531 */
3532 uint8_t idxReg;
3533 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3534 & ~pReNative->Core.bmHstRegsWithGstShadow
3535 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3536 & fRegMask;
3537 if (fRegs)
3538 {
3539 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3540 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3541 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3542 else
3543 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3544 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3545 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3546 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3547 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3548 }
3549 else
3550 {
3551 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, fRegMask);
3552 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3553 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3554 }
3555 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3556}
3557
3558
3559/** See iemNativeRegAllocTmpExInt for details. */
3560DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3561{
3562 return iemNativeRegAllocTmpExInt<true>(pReNative, poff, fRegMask);
3563}
3564
3565
3566/** See iemNativeRegAllocTmpExInt for details. */
3567DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpExPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3568{
3569 return iemNativeRegAllocTmpExInt<false>(pReNative, poff, fRegMask);
3570}
3571
3572
3573/** Internal templated variation of iemNativeRegAllocTmpEx. */
3574template<uint32_t const a_fRegMask, bool const a_fPreferVolatile>
3575DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3576{
3577 AssertCompile(!(a_fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3578 AssertCompile(!(a_fRegMask & IEMNATIVE_REG_FIXED_MASK));
3579
3580 /*
3581 * Try find a completely unused register, preferably a call-volatile one.
3582 */
3583 uint8_t idxReg;
3584 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3585 & ~pReNative->Core.bmHstRegsWithGstShadow
3586 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3587 & a_fRegMask;
3588 if (fRegs)
3589 {
3590 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3591 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3592 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3593 else
3594 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3595 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3596 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3597 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3598 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3599 }
3600 else
3601 {
3602 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, a_fRegMask);
3603 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3604 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3605 }
3606 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3607}
3608
3609
3610/**
3611 * Allocates a temporary register for loading an immediate value into.
3612 *
3613 * This will emit code to load the immediate, unless there happens to be an
3614 * unused register with the value already loaded.
3615 *
3616 * The caller will not modify the returned register, it must be considered
3617 * read-only. Free using iemNativeRegFreeTmpImm.
3618 *
3619 * @returns The host register number; throws VBox status code on failure, so no
3620 * need to check the return value.
3621 * @param pReNative The native recompile state.
3622 * @param poff Pointer to the variable with the code buffer position.
3623 * @param uImm The immediate value that the register must hold upon
3624 * return.
3625 * @note Prefers volatile registers.
3626 * @note Reusing immediate values has not been implemented yet.
3627 */
3628DECL_HIDDEN_THROW(uint8_t)
3629iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm)
3630{
3631 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff);
3632 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3633 return idxReg;
3634}
3635
3636
3637/**
3638 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3639 * iemNativeRegAllocTmpForGuestEFlags().
3640 *
3641 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3642 */
3643template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3644static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3645{
3646 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3647#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3648 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3649#endif
3650
3651 /*
3652 * First check if the guest register value is already in a host register.
3653 */
3654 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3655 {
3656 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3657 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3658 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3659 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3660
3661 /* It's not supposed to be allocated... */
3662 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3663 {
3664 /*
3665 * If the register will trash the guest shadow copy, try find a
3666 * completely unused register we can use instead. If that fails,
3667 * we need to disassociate the host reg from the guest reg.
3668 */
3669 /** @todo would be nice to know if preserving the register is in any way helpful. */
3670 /* If the purpose is calculations, try duplicate the register value as
3671 we'll be clobbering the shadow. */
3672 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3673 && ( ~pReNative->Core.bmHstRegs
3674 & ~pReNative->Core.bmHstRegsWithGstShadow
3675 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3676 {
3677 uint8_t const idxRegNew = iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3678
3679 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3680
3681 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3682 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3683 g_apszIemNativeHstRegNames[idxRegNew]));
3684 idxReg = idxRegNew;
3685 }
3686 /* If the current register matches the restrictions, go ahead and allocate
3687 it for the caller. */
3688 else if (a_fRegMask & RT_BIT_32(idxReg))
3689 {
3690 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3692 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3693 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3694 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3695 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3696 else
3697 {
3698 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3699 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3700 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3701 }
3702 }
3703 /* Otherwise, allocate a register that satisfies the caller and transfer
3704 the shadowing if compatible with the intended use. (This basically
3705 means the call wants a non-volatile register (RSP push/pop scenario).) */
3706 else
3707 {
3708 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3709 uint8_t const idxRegNew = (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3710 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3711 ? iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg))
3712 : iemNativeRegAllocTmpExPreferNonVolatile(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg));
3713 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3714 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3715 {
3716 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3717 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3718 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3719 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3720 }
3721 else
3722 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3723 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3724 g_apszIemNativeHstRegNames[idxRegNew]));
3725 idxReg = idxRegNew;
3726 }
3727 }
3728 else
3729 {
3730 /*
3731 * Oops. Shadowed guest register already allocated!
3732 *
3733 * Allocate a new register, copy the value and, if updating, the
3734 * guest shadow copy assignment to the new register.
3735 */
3736 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3737 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3738 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3739 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3740
3741 /** @todo share register for readonly access. */
3742 uint8_t const idxRegNew = a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3743 ? iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff)
3744 : iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff);
3745
3746 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3747 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3748
3749 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3750 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3751 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3752 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3753 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3754 else
3755 {
3756 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3757 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3758 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3759 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3760 }
3761 idxReg = idxRegNew;
3762 }
3763 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3764
3765#ifdef VBOX_STRICT
3766 /* Strict builds: Check that the value is correct. */
3767 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3768#endif
3769
3770#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3771 /** @todo r=aeichner Implement for registers other than GPR as well. */
3772 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3773 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3774 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3775 && enmGstReg <= kIemNativeGstReg_GprLast)
3776 || enmGstReg == kIemNativeGstReg_MxCsr)
3777 {
3778# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3779 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3780 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3781# endif
3782 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3783 }
3784#endif
3785
3786 return idxReg;
3787 }
3788
3789 /*
3790 * Allocate a new register, load it with the guest value and designate it as a copy of the
3791 */
3792 uint8_t const idxRegNew = a_enmIntendedUse != kIemNativeGstRegUse_Calculation
3793 ? iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff)
3794 : iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3795
3796 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3797 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3798
3799 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3800 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3801 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3802 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3803
3804#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3805 /** @todo r=aeichner Implement for registers other than GPR as well. */
3806 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3807 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3808 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3809 && enmGstReg <= kIemNativeGstReg_GprLast)
3810 || enmGstReg == kIemNativeGstReg_MxCsr)
3811 {
3812# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3813 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3814 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3815# endif
3816 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3817 }
3818#endif
3819
3820 return idxRegNew;
3821}
3822
3823
3824/**
3825 * Allocates a temporary host general purpose register for keeping a guest
3826 * register value.
3827 *
3828 * Since we may already have a register holding the guest register value,
3829 * code will be emitted to do the loading if that's not the case. Code may also
3830 * be emitted if we have to free up a register to satify the request.
3831 *
3832 * @returns The host register number; throws VBox status code on failure, so no
3833 * need to check the return value.
3834 * @param pReNative The native recompile state.
3835 * @param poff Pointer to the variable with the code buffer
3836 * position. This will be update if we need to move
3837 * a variable from register to stack in order to
3838 * satisfy the request.
3839 * @param enmGstReg The guest register that will is to be updated.
3840 * @tparam a_enmIntendedUse How the caller will be using the host register.
3841 * @tparam a_fNonVolatileRegs Set if no volatile register allowed, clear if
3842 * any register is okay (default).
3843 * The ASSUMPTION here is that the caller has
3844 * already flushed all volatile registers,
3845 * so this is only applied if we allocate a new
3846 * register.
3847 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3848 */
3849template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3850DECL_FORCE_INLINE_THROW(uint8_t)
3851iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3852{
3853#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3854 AssertMsg( pReNative->idxCurCall == 0
3855 || enmGstReg == kIemNativeGstReg_Pc
3856 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3857 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3858 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3859 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3860 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3861 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3862#endif
3863
3864 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3865 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3866 IEMNATIVE_HST_GREG_MASK
3867 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3868 else /* keep else, is required by MSC */
3869 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3870 IEMNATIVE_HST_GREG_MASK
3871 & ~IEMNATIVE_REG_FIXED_MASK
3872 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3873}
3874
3875/* Variants including volatile registers: */
3876
3877DECL_HIDDEN_THROW(uint8_t)
3878iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3879{
3880 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3881}
3882
3883DECL_HIDDEN_THROW(uint8_t)
3884iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3885{
3886 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3887}
3888
3889DECL_HIDDEN_THROW(uint8_t)
3890iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3891{
3892 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3893}
3894
3895DECL_HIDDEN_THROW(uint8_t)
3896iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3897{
3898 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3899}
3900
3901/* Variants excluding any volatile registers: */
3902
3903DECL_HIDDEN_THROW(uint8_t)
3904iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3905{
3906 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3907}
3908
3909DECL_HIDDEN_THROW(uint8_t)
3910iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3911{
3912 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3913}
3914
3915DECL_HIDDEN_THROW(uint8_t)
3916iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3917{
3918 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3919}
3920
3921DECL_HIDDEN_THROW(uint8_t)
3922iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3923{
3924 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3925}
3926
3927
3928
3929#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3930/**
3931 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3932 *
3933 * This takes additional arguments for covering liveness assertions in strict
3934 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3935 * kIemNativeGstReg_EFlags as argument.
3936 */
3937template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3938DECL_FORCE_INLINE_THROW(uint8_t)
3939iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3940 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3941{
3942 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3943 {
3944 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3945 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3946 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3947 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3948 uint32_t fState;
3949# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3950 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3951 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3952 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3953 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3954 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3955 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3956 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3957 ) \
3958 , ("%s - %u\n", #a_enmGstEfl, fState))
3959 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3960 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3961 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3962 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3963 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3964 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3965 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3966# undef MY_ASSERT_ONE_EFL
3967 }
3968 RT_NOREF(fPotentialCall);
3969
3970 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3971 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3972 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3973 IEMNATIVE_HST_GREG_MASK
3974 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3975 else /* keep else, is required by MSC */
3976 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3977 IEMNATIVE_HST_GREG_MASK
3978 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3979}
3980
3981
3982DECL_HIDDEN_THROW(uint8_t)
3983iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3984 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3985{
3986 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
3987}
3988
3989DECL_HIDDEN_THROW(uint8_t)
3990iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3991 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3992{
3993 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
3994}
3995
3996#endif
3997
3998
3999
4000/**
4001 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
4002 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
4003 *
4004 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
4005 */
4006DECL_FORCE_INLINE(uint8_t)
4007iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4008{
4009 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4010
4011 /*
4012 * First check if the guest register value is already in a host register.
4013 */
4014 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4015 {
4016 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4017 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4018 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4019 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4020
4021 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4022 {
4023 /*
4024 * We only do readonly use here, so easy compared to the other
4025 * variant of this code.
4026 */
4027 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4028 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4029 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4030 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4031 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4032
4033#ifdef VBOX_STRICT
4034 /* Strict builds: Check that the value is correct. */
4035 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4036#else
4037 RT_NOREF(poff);
4038#endif
4039 return idxReg;
4040 }
4041 }
4042
4043 return UINT8_MAX;
4044}
4045
4046
4047/**
4048 * Allocates a temporary host general purpose register that already holds the
4049 * given guest register value.
4050 *
4051 * The use case for this function is places where the shadowing state cannot be
4052 * modified due to branching and such. This will fail if the we don't have a
4053 * current shadow copy handy or if it's incompatible. The only code that will
4054 * be emitted here is value checking code in strict builds.
4055 *
4056 * The intended use can only be readonly!
4057 *
4058 * @returns The host register number, UINT8_MAX if not present.
4059 * @param pReNative The native recompile state.
4060 * @param poff Pointer to the instruction buffer offset.
4061 * Will be updated in strict builds if a register is
4062 * found.
4063 * @param enmGstReg The guest register that will is to be updated.
4064 * @note In strict builds, this may throw instruction buffer growth failures.
4065 * Non-strict builds will not throw anything.
4066 * @sa iemNativeRegAllocTmpForGuestReg
4067 */
4068DECL_HIDDEN_THROW(uint8_t)
4069iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4070{
4071#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4072 AssertMsg( pReNative->idxCurCall == 0
4073 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4074 || enmGstReg == kIemNativeGstReg_Pc
4075 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4076#endif
4077 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4078}
4079
4080
4081#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4082/**
4083 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4084 * EFLAGS.
4085 *
4086 * This takes additional arguments for covering liveness assertions in strict
4087 * builds, it's otherwise the same as
4088 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4089 * kIemNativeGstReg_EFlags as argument.
4090 *
4091 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4092 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4093 * commit. It the operation clobbers all the flags, @a fRead will be
4094 * zero, so better verify the whole picture while we're here.
4095 */
4096DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4097 uint64_t fRead, uint64_t fWrite /*=0*/)
4098{
4099 if (pReNative->idxCurCall != 0)
4100 {
4101 Assert(fRead | fWrite);
4102 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4103 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4104 uint64_t const fAll = fRead | fWrite;
4105 uint32_t fState;
4106# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4107 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4108 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4109 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4110 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4111 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4112 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4113 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4114 ) \
4115 , ("%s - %u\n", #a_enmGstEfl, fState))
4116 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4117 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4118 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4119 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4120 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4121 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4122 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4123# undef MY_ASSERT_ONE_EFL
4124 }
4125 RT_NOREF(fRead);
4126 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4127}
4128#endif
4129
4130
4131/**
4132 * Allocates argument registers for a function call.
4133 *
4134 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4135 * need to check the return value.
4136 * @param pReNative The native recompile state.
4137 * @param off The current code buffer offset.
4138 * @param cArgs The number of arguments the function call takes.
4139 */
4140DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4141{
4142 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4143 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4144 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4145 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4146
4147 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4148 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4149 else if (cArgs == 0)
4150 return true;
4151
4152 /*
4153 * Do we get luck and all register are free and not shadowing anything?
4154 */
4155 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4156 for (uint32_t i = 0; i < cArgs; i++)
4157 {
4158 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4159 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4160 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4161 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4162 }
4163 /*
4164 * Okay, not lucky so we have to free up the registers.
4165 */
4166 else
4167 for (uint32_t i = 0; i < cArgs; i++)
4168 {
4169 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4170 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4171 {
4172 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4173 {
4174 case kIemNativeWhat_Var:
4175 {
4176 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4177 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4178 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4179 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4180 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4181 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4182
4183 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4184 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4185 else
4186 {
4187 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4188 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4189 }
4190 break;
4191 }
4192
4193 case kIemNativeWhat_Tmp:
4194 case kIemNativeWhat_Arg:
4195 case kIemNativeWhat_rc:
4196 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4197 default:
4198 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4199 }
4200
4201 }
4202 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4203 {
4204 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4205 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4206 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4207#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4208 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4209#endif
4210 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4211 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4212 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4213 }
4214 else
4215 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4216 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4217 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4218 }
4219 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4220 return true;
4221}
4222
4223
4224DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4225
4226
4227#if 0
4228/**
4229 * Frees a register assignment of any type.
4230 *
4231 * @param pReNative The native recompile state.
4232 * @param idxHstReg The register to free.
4233 *
4234 * @note Does not update variables.
4235 */
4236DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4237{
4238 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4239 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4240 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4241 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4242 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4243 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4244 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4245 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4246 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4247 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4248 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4249 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4250 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4251 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4252
4253 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4254 /* no flushing, right:
4255 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4256 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4257 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4258 */
4259}
4260#endif
4261
4262
4263/**
4264 * Frees a temporary register.
4265 *
4266 * Any shadow copies of guest registers assigned to the host register will not
4267 * be flushed by this operation.
4268 */
4269DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4270{
4271 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4272 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4273 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4274 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4275 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4276}
4277
4278
4279/**
4280 * Frees a temporary immediate register.
4281 *
4282 * It is assumed that the call has not modified the register, so it still hold
4283 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4284 */
4285DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4286{
4287 iemNativeRegFreeTmp(pReNative, idxHstReg);
4288}
4289
4290
4291/**
4292 * Frees a register assigned to a variable.
4293 *
4294 * The register will be disassociated from the variable.
4295 */
4296DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4297{
4298 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4299 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4300 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4301 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4302 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4303 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4304
4305 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4306 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4307 if (!fFlushShadows)
4308 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4309 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4310 else
4311 {
4312 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4313 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4314#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4315 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4316#endif
4317 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4318 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4319 uint64_t fGstRegShadows = fGstRegShadowsOld;
4320 while (fGstRegShadows)
4321 {
4322 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4323 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4324
4325 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4326 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4327 }
4328 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4329 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4330 }
4331}
4332
4333
4334#if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4335/** Host CPU SIMD register names. */
4336DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4337{
4338# ifdef RT_ARCH_AMD64
4339 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4340# elif RT_ARCH_ARM64
4341 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4342 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4343# else
4344# error "port me"
4345# endif
4346};
4347#endif
4348
4349
4350/**
4351 * Frees a SIMD register assigned to a variable.
4352 *
4353 * The register will be disassociated from the variable.
4354 */
4355DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4356{
4357 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4358 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4359 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4360 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4361 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4362 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4363
4364 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4365 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4366 if (!fFlushShadows)
4367 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4368 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4369 else
4370 {
4371 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4372 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4373 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4374 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4375 uint64_t fGstRegShadows = fGstRegShadowsOld;
4376 while (fGstRegShadows)
4377 {
4378 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4379 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4380
4381 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4382 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4383 }
4384 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4385 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4386 }
4387}
4388
4389
4390/**
4391 * Reassigns a variable to a different SIMD register specified by the caller.
4392 *
4393 * @returns The new code buffer position.
4394 * @param pReNative The native recompile state.
4395 * @param off The current code buffer position.
4396 * @param idxVar The variable index.
4397 * @param idxRegOld The old host register number.
4398 * @param idxRegNew The new host register number.
4399 * @param pszCaller The caller for logging.
4400 */
4401static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4402 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4403{
4404 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4405 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4406 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4407 RT_NOREF(pszCaller);
4408
4409 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4410 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4411 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4412
4413 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4414 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4415 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4416
4417 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4418 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4419 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4420
4421 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4422 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4423 else
4424 {
4425 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4426 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4427 }
4428
4429 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4430 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4431 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4432 if (fGstRegShadows)
4433 {
4434 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4435 | RT_BIT_32(idxRegNew);
4436 while (fGstRegShadows)
4437 {
4438 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4439 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4440
4441 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4442 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4443 }
4444 }
4445
4446 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4447 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4448 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4449 return off;
4450}
4451
4452
4453/**
4454 * Moves a variable to a different register or spills it onto the stack.
4455 *
4456 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4457 * kinds can easily be recreated if needed later.
4458 *
4459 * @returns The new code buffer position.
4460 * @param pReNative The native recompile state.
4461 * @param off The current code buffer position.
4462 * @param idxVar The variable index.
4463 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4464 * call-volatile registers.
4465 */
4466DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4467 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4468{
4469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4470 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4471 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4472 Assert(!pVar->fRegAcquired);
4473 Assert(!pVar->fSimdReg);
4474
4475 uint8_t const idxRegOld = pVar->idxReg;
4476 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4477 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4478 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4479 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4480 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4481 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4482 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4483 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4484 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4485 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4486
4487 /** @todo Add statistics on this.*/
4488 /** @todo Implement basic variable liveness analysis (python) so variables
4489 * can be freed immediately once no longer used. This has the potential to
4490 * be trashing registers and stack for dead variables.
4491 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4492
4493 /*
4494 * First try move it to a different register, as that's cheaper.
4495 */
4496 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4497 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4498 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4499 if (fRegs)
4500 {
4501 /* Avoid using shadow registers, if possible. */
4502 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4503 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4504 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4505 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4506 }
4507
4508 /*
4509 * Otherwise we must spill the register onto the stack.
4510 */
4511 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4512 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4513 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4514
4515 if (pVar->cbVar == sizeof(RTUINT128U))
4516 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4517 else
4518 {
4519 Assert(pVar->cbVar == sizeof(RTUINT256U));
4520 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4521 }
4522
4523 pVar->idxReg = UINT8_MAX;
4524 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4525 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4526 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4527 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4528 return off;
4529}
4530
4531
4532/**
4533 * Called right before emitting a call instruction to move anything important
4534 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4535 * optionally freeing argument variables.
4536 *
4537 * @returns New code buffer offset, UINT32_MAX on failure.
4538 * @param pReNative The native recompile state.
4539 * @param off The code buffer offset.
4540 * @param cArgs The number of arguments the function call takes.
4541 * It is presumed that the host register part of these have
4542 * been allocated as such already and won't need moving,
4543 * just freeing.
4544 * @param fKeepVars Mask of variables that should keep their register
4545 * assignments. Caller must take care to handle these.
4546 */
4547DECL_HIDDEN_THROW(uint32_t)
4548iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4549{
4550 Assert(!cArgs); RT_NOREF(cArgs);
4551
4552 /* fKeepVars will reduce this mask. */
4553 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4554
4555 /*
4556 * Move anything important out of volatile registers.
4557 */
4558 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4559#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4560 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4561#endif
4562 ;
4563
4564 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4565 if (!fSimdRegsToMove)
4566 { /* likely */ }
4567 else
4568 {
4569 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4570 while (fSimdRegsToMove != 0)
4571 {
4572 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4573 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4574
4575 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4576 {
4577 case kIemNativeWhat_Var:
4578 {
4579 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4581 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4582 Assert(pVar->idxReg == idxSimdReg);
4583 Assert(pVar->fSimdReg);
4584 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4585 {
4586 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4587 idxVar, pVar->enmKind, pVar->idxReg));
4588 if (pVar->enmKind != kIemNativeVarKind_Stack)
4589 pVar->idxReg = UINT8_MAX;
4590 else
4591 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4592 }
4593 else
4594 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4595 continue;
4596 }
4597
4598 case kIemNativeWhat_Arg:
4599 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4600 continue;
4601
4602 case kIemNativeWhat_rc:
4603 case kIemNativeWhat_Tmp:
4604 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4605 continue;
4606
4607 case kIemNativeWhat_FixedReserved:
4608#ifdef RT_ARCH_ARM64
4609 continue; /* On ARM the upper half of the virtual 256-bit register. */
4610#endif
4611
4612 case kIemNativeWhat_FixedTmp:
4613 case kIemNativeWhat_pVCpuFixed:
4614 case kIemNativeWhat_pCtxFixed:
4615 case kIemNativeWhat_PcShadow:
4616 case kIemNativeWhat_Invalid:
4617 case kIemNativeWhat_End:
4618 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4619 }
4620 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4621 }
4622 }
4623
4624 /*
4625 * Do the actual freeing.
4626 */
4627 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4628 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4629 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4630 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4631
4632 /* If there are guest register shadows in any call-volatile register, we
4633 have to clear the corrsponding guest register masks for each register. */
4634 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4635 if (fHstSimdRegsWithGstShadow)
4636 {
4637 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4638 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4639 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4640 do
4641 {
4642 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4643 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4644
4645 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4646
4647#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4648 /*
4649 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4650 * to call volatile registers).
4651 */
4652 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4653 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4654 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4655#endif
4656 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4657 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4658
4659 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4660 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4661 } while (fHstSimdRegsWithGstShadow != 0);
4662 }
4663
4664 return off;
4665}
4666
4667
4668/**
4669 * Called right before emitting a call instruction to move anything important
4670 * out of call-volatile registers, free and flush the call-volatile registers,
4671 * optionally freeing argument variables.
4672 *
4673 * @returns New code buffer offset, UINT32_MAX on failure.
4674 * @param pReNative The native recompile state.
4675 * @param off The code buffer offset.
4676 * @param cArgs The number of arguments the function call takes.
4677 * It is presumed that the host register part of these have
4678 * been allocated as such already and won't need moving,
4679 * just freeing.
4680 * @param fKeepVars Mask of variables that should keep their register
4681 * assignments. Caller must take care to handle these.
4682 */
4683DECL_HIDDEN_THROW(uint32_t)
4684iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4685{
4686 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4687
4688 /* fKeepVars will reduce this mask. */
4689 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4690
4691#ifdef RT_ARCH_ARM64
4692AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4693#endif
4694
4695 /*
4696 * Move anything important out of volatile registers.
4697 */
4698 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4699 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4700 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4701#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4702 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4703#endif
4704 & ~g_afIemNativeCallRegs[cArgs];
4705
4706 fRegsToMove &= pReNative->Core.bmHstRegs;
4707 if (!fRegsToMove)
4708 { /* likely */ }
4709 else
4710 {
4711 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4712 while (fRegsToMove != 0)
4713 {
4714 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4715 fRegsToMove &= ~RT_BIT_32(idxReg);
4716
4717 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4718 {
4719 case kIemNativeWhat_Var:
4720 {
4721 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4723 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4724 Assert(pVar->idxReg == idxReg);
4725 Assert(!pVar->fSimdReg);
4726 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4727 {
4728 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4729 idxVar, pVar->enmKind, pVar->idxReg));
4730 if (pVar->enmKind != kIemNativeVarKind_Stack)
4731 pVar->idxReg = UINT8_MAX;
4732 else
4733 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4734 }
4735 else
4736 fRegsToFree &= ~RT_BIT_32(idxReg);
4737 continue;
4738 }
4739
4740 case kIemNativeWhat_Arg:
4741 AssertMsgFailed(("What?!?: %u\n", idxReg));
4742 continue;
4743
4744 case kIemNativeWhat_rc:
4745 case kIemNativeWhat_Tmp:
4746 AssertMsgFailed(("Missing free: %u\n", idxReg));
4747 continue;
4748
4749 case kIemNativeWhat_FixedTmp:
4750 case kIemNativeWhat_pVCpuFixed:
4751 case kIemNativeWhat_pCtxFixed:
4752 case kIemNativeWhat_PcShadow:
4753 case kIemNativeWhat_FixedReserved:
4754 case kIemNativeWhat_Invalid:
4755 case kIemNativeWhat_End:
4756 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4757 }
4758 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4759 }
4760 }
4761
4762 /*
4763 * Do the actual freeing.
4764 */
4765 if (pReNative->Core.bmHstRegs & fRegsToFree)
4766 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4767 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4768 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4769
4770 /* If there are guest register shadows in any call-volatile register, we
4771 have to clear the corrsponding guest register masks for each register. */
4772 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4773 if (fHstRegsWithGstShadow)
4774 {
4775 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4776 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4777 fHstRegsWithGstShadow));
4778 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4779 do
4780 {
4781 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4782 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4783
4784 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4785
4786#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4787 /*
4788 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4789 * to call volatile registers).
4790 */
4791 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4792 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4793 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4794#endif
4795
4796 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4797 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4798 } while (fHstRegsWithGstShadow != 0);
4799 }
4800
4801 /*
4802 * Now for the SIMD registers, no argument support for now.
4803 */
4804 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4805
4806 return off;
4807}
4808
4809
4810/**
4811 * Flushes a set of guest register shadow copies.
4812 *
4813 * This is usually done after calling a threaded function or a C-implementation
4814 * of an instruction.
4815 *
4816 * @param pReNative The native recompile state.
4817 * @param fGstRegs Set of guest registers to flush.
4818 */
4819DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4820{
4821 /*
4822 * Reduce the mask by what's currently shadowed
4823 */
4824 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4825 fGstRegs &= bmGstRegShadowsOld;
4826 if (fGstRegs)
4827 {
4828 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4829 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4830 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4831 if (bmGstRegShadowsNew)
4832 {
4833 /*
4834 * Partial.
4835 */
4836 do
4837 {
4838 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4839 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4840 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4841 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4842 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4843#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4844 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4845#endif
4846
4847 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4848 fGstRegs &= ~fInThisHstReg;
4849 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4850 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4851 if (!fGstRegShadowsNew)
4852 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4853 } while (fGstRegs != 0);
4854 }
4855 else
4856 {
4857 /*
4858 * Clear all.
4859 */
4860 do
4861 {
4862 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4863 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4864 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4865 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4866 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4867#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4868 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4869#endif
4870
4871 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4872 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4873 } while (fGstRegs != 0);
4874 pReNative->Core.bmHstRegsWithGstShadow = 0;
4875 }
4876 }
4877}
4878
4879
4880/**
4881 * Flushes guest register shadow copies held by a set of host registers.
4882 *
4883 * This is used with the TLB lookup code for ensuring that we don't carry on
4884 * with any guest shadows in volatile registers, as these will get corrupted by
4885 * a TLB miss.
4886 *
4887 * @param pReNative The native recompile state.
4888 * @param fHstRegs Set of host registers to flush guest shadows for.
4889 */
4890DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4891{
4892 /*
4893 * Reduce the mask by what's currently shadowed.
4894 */
4895 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4896 fHstRegs &= bmHstRegsWithGstShadowOld;
4897 if (fHstRegs)
4898 {
4899 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4900 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4901 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4902 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4903 if (bmHstRegsWithGstShadowNew)
4904 {
4905 /*
4906 * Partial (likely).
4907 */
4908 uint64_t fGstShadows = 0;
4909 do
4910 {
4911 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4912 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4913 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4914 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4915#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4916 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4917#endif
4918
4919 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4920 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4921 fHstRegs &= ~RT_BIT_32(idxHstReg);
4922 } while (fHstRegs != 0);
4923 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4924 }
4925 else
4926 {
4927 /*
4928 * Clear all.
4929 */
4930 do
4931 {
4932 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4933 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4934 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4935 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4936#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4937 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4938#endif
4939
4940 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4941 fHstRegs &= ~RT_BIT_32(idxHstReg);
4942 } while (fHstRegs != 0);
4943 pReNative->Core.bmGstRegShadows = 0;
4944 }
4945 }
4946}
4947
4948
4949/**
4950 * Restores guest shadow copies in volatile registers.
4951 *
4952 * This is used after calling a helper function (think TLB miss) to restore the
4953 * register state of volatile registers.
4954 *
4955 * @param pReNative The native recompile state.
4956 * @param off The code buffer offset.
4957 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4958 * be active (allocated) w/o asserting. Hack.
4959 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4960 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4961 */
4962DECL_HIDDEN_THROW(uint32_t)
4963iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4964{
4965 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4966 if (fHstRegs)
4967 {
4968 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4969 do
4970 {
4971 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4972
4973 /* It's not fatal if a register is active holding a variable that
4974 shadowing a guest register, ASSUMING all pending guest register
4975 writes were flushed prior to the helper call. However, we'll be
4976 emitting duplicate restores, so it wasts code space. */
4977 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4978 RT_NOREF(fHstRegsActiveShadows);
4979
4980 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4981#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4982 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4983#endif
4984 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4985 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4986 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4987
4988 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4989 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4990
4991 fHstRegs &= ~RT_BIT_32(idxHstReg);
4992 } while (fHstRegs != 0);
4993 }
4994 return off;
4995}
4996
4997
4998
4999
5000/*********************************************************************************************************************************
5001* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5002*********************************************************************************************************************************/
5003
5004/**
5005 * Info about shadowed guest SIMD register values.
5006 * @see IEMNATIVEGSTSIMDREG
5007 */
5008static struct
5009{
5010 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5011 uint32_t offXmm;
5012 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5013 uint32_t offYmm;
5014 /** Name (for logging). */
5015 const char *pszName;
5016} const g_aGstSimdShadowInfo[] =
5017{
5018#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5019 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5020 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5021 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5022 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5023 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5024 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5025 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5026 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5027 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5028 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5029 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5030 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5031 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5032 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5033 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5034 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5035 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5036#undef CPUMCTX_OFF_AND_SIZE
5037};
5038AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5039
5040
5041/**
5042 * Frees a temporary SIMD register.
5043 *
5044 * Any shadow copies of guest registers assigned to the host register will not
5045 * be flushed by this operation.
5046 */
5047DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5048{
5049 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5050 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5051 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5052 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5053 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5054}
5055
5056
5057/**
5058 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5059 *
5060 * @returns New code bufferoffset.
5061 * @param pReNative The native recompile state.
5062 * @param off Current code buffer position.
5063 * @param enmGstSimdReg The guest SIMD register to flush.
5064 */
5065DECL_HIDDEN_THROW(uint32_t)
5066iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5067{
5068 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5069
5070 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5071 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5072 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5073 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5074
5075 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5076 {
5077 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5078 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5079 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5080 }
5081
5082 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5083 {
5084 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5085 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5086 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5087 }
5088
5089 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5090 return off;
5091}
5092
5093
5094/**
5095 * Flush the given set of guest SIMD registers if marked as dirty.
5096 *
5097 * @returns New code buffer offset.
5098 * @param pReNative The native recompile state.
5099 * @param off Current code buffer position.
5100 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5101 */
5102DECL_HIDDEN_THROW(uint32_t)
5103iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5104{
5105 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5106 & fFlushGstSimdReg;
5107 if (bmGstSimdRegShadowDirty)
5108 {
5109# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5110 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5111 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5112# endif
5113
5114 do
5115 {
5116 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5117 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5118 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5119 } while (bmGstSimdRegShadowDirty);
5120 }
5121
5122 return off;
5123}
5124
5125
5126#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5127/**
5128 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5129 *
5130 * @returns New code buffer offset.
5131 * @param pReNative The native recompile state.
5132 * @param off Current code buffer position.
5133 * @param idxHstSimdReg The host SIMD register.
5134 *
5135 * @note This doesn't do any unshadowing of guest registers from the host register.
5136 */
5137DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5138{
5139 /* We need to flush any pending guest register writes this host register shadows. */
5140 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5141 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5142 if (bmGstSimdRegShadowDirty)
5143 {
5144# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5145 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5146 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5147# endif
5148
5149 do
5150 {
5151 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5152 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5153 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5154 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5155 } while (bmGstSimdRegShadowDirty);
5156 }
5157
5158 return off;
5159}
5160#endif
5161
5162
5163/**
5164 * Locate a register, possibly freeing one up.
5165 *
5166 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5167 * failed.
5168 *
5169 * @returns Host register number on success. Returns UINT8_MAX if no registers
5170 * found, the caller is supposed to deal with this and raise a
5171 * allocation type specific status code (if desired).
5172 *
5173 * @throws VBox status code if we're run into trouble spilling a variable of
5174 * recording debug info. Does NOT throw anything if we're out of
5175 * registers, though.
5176 */
5177static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5178 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5179{
5180 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5181 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5182 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5183
5184 /*
5185 * Try a freed register that's shadowing a guest register.
5186 */
5187 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5188 if (fRegs)
5189 {
5190 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5191
5192#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5193 /*
5194 * When we have livness information, we use it to kick out all shadowed
5195 * guest register that will not be needed any more in this TB. If we're
5196 * lucky, this may prevent us from ending up here again.
5197 *
5198 * Note! We must consider the previous entry here so we don't free
5199 * anything that the current threaded function requires (current
5200 * entry is produced by the next threaded function).
5201 */
5202 uint32_t const idxCurCall = pReNative->idxCurCall;
5203 if (idxCurCall > 0)
5204 {
5205 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5206 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5207
5208 /* If it matches any shadowed registers. */
5209 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5210 {
5211 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5212 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5213 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5214
5215 /* See if we've got any unshadowed registers we can return now. */
5216 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5217 if (fUnshadowedRegs)
5218 {
5219 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5220 return (fPreferVolatile
5221 ? ASMBitFirstSetU32(fUnshadowedRegs)
5222 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5223 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5224 - 1;
5225 }
5226 }
5227 }
5228#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5229
5230 unsigned const idxReg = (fPreferVolatile
5231 ? ASMBitFirstSetU32(fRegs)
5232 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5233 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5234 - 1;
5235
5236 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5237 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5238 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5239 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5240
5241 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5242 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5243
5244 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5245 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5246 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5247 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5248 return idxReg;
5249 }
5250
5251 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5252
5253 /*
5254 * Try free up a variable that's in a register.
5255 *
5256 * We do two rounds here, first evacuating variables we don't need to be
5257 * saved on the stack, then in the second round move things to the stack.
5258 */
5259 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5260 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5261 {
5262 uint32_t fVars = pReNative->Core.bmVars;
5263 while (fVars)
5264 {
5265 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5266 if (pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the SIMD allocator) */
5267 {
5268 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5269 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5270 && (RT_BIT_32(idxReg) & fRegMask)
5271 && ( iLoop == 0
5272 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5273 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5274 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5275 {
5276 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5277 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5278 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5279 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5280 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5281 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5282
5283 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5284 {
5285 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5286 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5287 }
5288
5289 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5290 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5291
5292 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5293 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5294 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5295 return idxReg;
5296 }
5297 }
5298 fVars &= ~RT_BIT_32(idxVar);
5299 }
5300 }
5301
5302 AssertFailed();
5303 return UINT8_MAX;
5304}
5305
5306
5307/**
5308 * Flushes a set of guest register shadow copies.
5309 *
5310 * This is usually done after calling a threaded function or a C-implementation
5311 * of an instruction.
5312 *
5313 * @param pReNative The native recompile state.
5314 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5315 */
5316DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5317{
5318 /*
5319 * Reduce the mask by what's currently shadowed
5320 */
5321 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5322 fGstSimdRegs &= bmGstSimdRegShadows;
5323 if (fGstSimdRegs)
5324 {
5325 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5326 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5327 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5328 if (bmGstSimdRegShadowsNew)
5329 {
5330 /*
5331 * Partial.
5332 */
5333 do
5334 {
5335 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5336 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5337 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5338 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5339 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5340 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5341
5342 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5343 fGstSimdRegs &= ~fInThisHstReg;
5344 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5345 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5346 if (!fGstRegShadowsNew)
5347 {
5348 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5349 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5350 }
5351 } while (fGstSimdRegs != 0);
5352 }
5353 else
5354 {
5355 /*
5356 * Clear all.
5357 */
5358 do
5359 {
5360 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5361 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5362 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5363 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5364 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5365 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5366
5367 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5368 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5369 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5370 } while (fGstSimdRegs != 0);
5371 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5372 }
5373 }
5374}
5375
5376
5377/**
5378 * Allocates a temporary host SIMD register.
5379 *
5380 * This may emit code to save register content onto the stack in order to free
5381 * up a register.
5382 *
5383 * @returns The host register number; throws VBox status code on failure,
5384 * so no need to check the return value.
5385 * @param pReNative The native recompile state.
5386 * @param poff Pointer to the variable with the code buffer position.
5387 * This will be update if we need to move a variable from
5388 * register to stack in order to satisfy the request.
5389 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5390 * registers (@c true, default) or the other way around
5391 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5392 */
5393DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5394{
5395 /*
5396 * Try find a completely unused register, preferably a call-volatile one.
5397 */
5398 uint8_t idxSimdReg;
5399 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5400 & ~pReNative->Core.bmHstRegsWithGstShadow
5401 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5402 if (fRegs)
5403 {
5404 if (fPreferVolatile)
5405 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5406 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5407 else
5408 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5409 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5410 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5411 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5412
5413 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5414 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5415 }
5416 else
5417 {
5418 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5419 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5420 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5421 }
5422
5423 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5424 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5425}
5426
5427
5428/**
5429 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5430 * registers.
5431 *
5432 * @returns The host register number; throws VBox status code on failure,
5433 * so no need to check the return value.
5434 * @param pReNative The native recompile state.
5435 * @param poff Pointer to the variable with the code buffer position.
5436 * This will be update if we need to move a variable from
5437 * register to stack in order to satisfy the request.
5438 * @param fRegMask Mask of acceptable registers.
5439 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5440 * registers (@c true, default) or the other way around
5441 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5442 */
5443DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5444 bool fPreferVolatile /*= true*/)
5445{
5446 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5447 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5448
5449 /*
5450 * Try find a completely unused register, preferably a call-volatile one.
5451 */
5452 uint8_t idxSimdReg;
5453 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5454 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5455 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5456 & fRegMask;
5457 if (fRegs)
5458 {
5459 if (fPreferVolatile)
5460 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5461 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5462 else
5463 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5464 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5465 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5466 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5467
5468 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5469 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5470 }
5471 else
5472 {
5473 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5474 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5475 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5476 }
5477
5478 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5479 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5480}
5481
5482
5483/**
5484 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5485 *
5486 * @param pReNative The native recompile state.
5487 * @param idxHstSimdReg The host SIMD register to update the state for.
5488 * @param enmLoadSz The load size to set.
5489 */
5490DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5491 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5492{
5493 /* Everything valid already? -> nothing to do. */
5494 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5495 return;
5496
5497 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5498 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5499 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5500 {
5501 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5502 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5503 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5504 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5505 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5506 }
5507}
5508
5509
5510static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5511 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5512{
5513 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5514 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5515 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5516 {
5517#ifdef RT_ARCH_ARM64
5518 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5519 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5520#endif
5521
5522 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5523 {
5524 switch (enmLoadSzDst)
5525 {
5526 case kIemNativeGstSimdRegLdStSz_256:
5527 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5528 break;
5529 case kIemNativeGstSimdRegLdStSz_Low128:
5530 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5531 break;
5532 case kIemNativeGstSimdRegLdStSz_High128:
5533 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5534 break;
5535 default:
5536 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5537 }
5538
5539 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5540 }
5541 }
5542 else
5543 {
5544 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5545 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5546 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5547 }
5548
5549 return off;
5550}
5551
5552
5553/**
5554 * Allocates a temporary host SIMD register for keeping a guest
5555 * SIMD register value.
5556 *
5557 * Since we may already have a register holding the guest register value,
5558 * code will be emitted to do the loading if that's not the case. Code may also
5559 * be emitted if we have to free up a register to satify the request.
5560 *
5561 * @returns The host register number; throws VBox status code on failure, so no
5562 * need to check the return value.
5563 * @param pReNative The native recompile state.
5564 * @param poff Pointer to the variable with the code buffer
5565 * position. This will be update if we need to move a
5566 * variable from register to stack in order to satisfy
5567 * the request.
5568 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5569 * @param enmLoadSz Load/store size.
5570 * @param enmIntendedUse How the caller will be using the host register.
5571 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5572 * register is okay (default). The ASSUMPTION here is
5573 * that the caller has already flushed all volatile
5574 * registers, so this is only applied if we allocate a
5575 * new register.
5576 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5577 */
5578DECL_HIDDEN_THROW(uint8_t)
5579iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5580 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz,
5581 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5582 bool fNoVolatileRegs /*= false*/)
5583{
5584 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5585#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5586 AssertMsg( pReNative->idxCurCall == 0
5587 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5588 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5589 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5590 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5591 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5592 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5593#endif
5594#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5595 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5596#endif
5597 uint32_t const fRegMask = !fNoVolatileRegs
5598 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5599 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5600
5601 /*
5602 * First check if the guest register value is already in a host register.
5603 */
5604 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5605 {
5606 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5607 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5608 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5609 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5610
5611 /* It's not supposed to be allocated... */
5612 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5613 {
5614 /*
5615 * If the register will trash the guest shadow copy, try find a
5616 * completely unused register we can use instead. If that fails,
5617 * we need to disassociate the host reg from the guest reg.
5618 */
5619 /** @todo would be nice to know if preserving the register is in any way helpful. */
5620 /* If the purpose is calculations, try duplicate the register value as
5621 we'll be clobbering the shadow. */
5622 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5623 && ( ~pReNative->Core.bmHstSimdRegs
5624 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5625 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5626 {
5627 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5628
5629 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5630
5631 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5632 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5633 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5634 idxSimdReg = idxRegNew;
5635 }
5636 /* If the current register matches the restrictions, go ahead and allocate
5637 it for the caller. */
5638 else if (fRegMask & RT_BIT_32(idxSimdReg))
5639 {
5640 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5641 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5642 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5643 {
5644 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5645 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5646 else
5647 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5648 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5649 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5650 }
5651 else
5652 {
5653 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5654 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5655 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5656 }
5657 }
5658 /* Otherwise, allocate a register that satisfies the caller and transfer
5659 the shadowing if compatible with the intended use. (This basically
5660 means the call wants a non-volatile register (RSP push/pop scenario).) */
5661 else
5662 {
5663 Assert(fNoVolatileRegs);
5664 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5665 !fNoVolatileRegs
5666 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5667 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5668 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5669 {
5670 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5671 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5672 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5673 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5674 }
5675 else
5676 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5677 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5678 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5679 idxSimdReg = idxRegNew;
5680 }
5681 }
5682 else
5683 {
5684 /*
5685 * Oops. Shadowed guest register already allocated!
5686 *
5687 * Allocate a new register, copy the value and, if updating, the
5688 * guest shadow copy assignment to the new register.
5689 */
5690 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5691 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5692 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5693 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5694
5695 /** @todo share register for readonly access. */
5696 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5697 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5698
5699 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5700 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5701 else
5702 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5703
5704 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5705 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5706 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5707 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5708 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5709 else
5710 {
5711 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5712 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5713 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5714 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5715 }
5716 idxSimdReg = idxRegNew;
5717 }
5718 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5719
5720#ifdef VBOX_STRICT
5721 /* Strict builds: Check that the value is correct. */
5722 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5723 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5724#endif
5725
5726 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5727 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5728 {
5729#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5730 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5731 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5732#endif
5733
5734 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5735 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5736 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5737 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5738 else
5739 {
5740 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5741 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5742 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5743 }
5744 }
5745
5746 return idxSimdReg;
5747 }
5748
5749 /*
5750 * Allocate a new register, load it with the guest value and designate it as a copy of the
5751 */
5752 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5753
5754 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5755 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5756 else
5757 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5758
5759 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5760 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5761
5762 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5763 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5764 {
5765#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5766 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5767 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5768#endif
5769
5770 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5771 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5772 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5773 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5774 else
5775 {
5776 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5777 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5778 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5779 }
5780 }
5781
5782 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5783 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5784
5785 return idxRegNew;
5786}
5787
5788
5789/**
5790 * Flushes guest SIMD register shadow copies held by a set of host registers.
5791 *
5792 * This is used whenever calling an external helper for ensuring that we don't carry on
5793 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5794 *
5795 * @param pReNative The native recompile state.
5796 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5797 */
5798DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5799{
5800 /*
5801 * Reduce the mask by what's currently shadowed.
5802 */
5803 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5804 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5805 if (fHstSimdRegs)
5806 {
5807 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5808 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5809 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5810 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5811 if (bmHstSimdRegsWithGstShadowNew)
5812 {
5813 /*
5814 * Partial (likely).
5815 */
5816 uint64_t fGstShadows = 0;
5817 do
5818 {
5819 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5820 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5821 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5822 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5823 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5824 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5825
5826 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5827 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5828 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5829 } while (fHstSimdRegs != 0);
5830 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5831 }
5832 else
5833 {
5834 /*
5835 * Clear all.
5836 */
5837 do
5838 {
5839 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5840 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5841 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5842 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5843 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5844 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5845
5846 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5847 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5848 } while (fHstSimdRegs != 0);
5849 pReNative->Core.bmGstSimdRegShadows = 0;
5850 }
5851 }
5852}
5853
5854
5855
5856/*********************************************************************************************************************************
5857* Code emitters for flushing pending guest register writes and sanity checks *
5858*********************************************************************************************************************************/
5859
5860#ifdef VBOX_STRICT
5861/**
5862 * Does internal register allocator sanity checks.
5863 */
5864DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5865{
5866 /*
5867 * Iterate host registers building a guest shadowing set.
5868 */
5869 uint64_t bmGstRegShadows = 0;
5870 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5871 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5872 while (bmHstRegsWithGstShadow)
5873 {
5874 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5875 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5876 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5877
5878 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5879 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5880 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5881 bmGstRegShadows |= fThisGstRegShadows;
5882 while (fThisGstRegShadows)
5883 {
5884 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5885 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5886 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5887 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5888 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5889 }
5890 }
5891 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5892 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5893 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5894
5895 /*
5896 * Now the other way around, checking the guest to host index array.
5897 */
5898 bmHstRegsWithGstShadow = 0;
5899 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5900 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5901 while (bmGstRegShadows)
5902 {
5903 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5904 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5905 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5906
5907 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5908 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5909 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5910 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5911 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5912 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5913 }
5914 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5915 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5916 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5917}
5918#endif /* VBOX_STRICT */
5919
5920
5921/**
5922 * Flushes any delayed guest register writes.
5923 *
5924 * This must be called prior to calling CImpl functions and any helpers that use
5925 * the guest state (like raising exceptions) and such.
5926 *
5927 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5928 * the caller if it wishes to do so.
5929 */
5930DECL_HIDDEN_THROW(uint32_t)
5931iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5932{
5933#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5934 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5935 off = iemNativeEmitPcWriteback(pReNative, off);
5936#else
5937 RT_NOREF(pReNative, fGstShwExcept);
5938#endif
5939
5940#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5941 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5942#endif
5943
5944 return iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5945}
5946
5947#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5948
5949# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5950
5951/**
5952 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5953 */
5954DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5955{
5956 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5957 Assert(pReNative->Core.fDebugPcInitialized);
5958
5959 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5960# ifdef RT_ARCH_AMD64
5961 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5962 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5963 pCodeBuf[off++] = 0x3b;
5964 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5965# else
5966 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5967 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5968 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5969# endif
5970
5971 uint32_t offFixup = off;
5972 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5973 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5974 iemNativeFixupFixedJump(pReNative, offFixup, off);
5975
5976 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5977 return off;
5978}
5979
5980
5981/**
5982 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5983 */
5984DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5985{
5986 if (pReNative->Core.fDebugPcInitialized)
5987 {
5988 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5989 if (pReNative->Core.offPc)
5990 {
5991 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5992 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5993 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5994 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5995 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5996 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5997 }
5998 else
5999 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6000 iemNativeRegFreeTmp(pReNative, idxPcReg);
6001 }
6002 return off;
6003}
6004
6005# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
6006
6007/**
6008 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6009 */
6010DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6011{
6012 Assert(pReNative->Core.offPc);
6013# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
6014 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
6015# else
6016 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
6017 uint8_t idxCurCall = pReNative->idxCurCall;
6018 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
6019 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
6020 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
6021 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
6022 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
6023 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
6024 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
6025
6026 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
6027
6028# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6029 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6030 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
6031# endif
6032# endif
6033
6034# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6035 /* Allocate a temporary PC register. */
6036 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6037
6038 /* Perform the addition and store the result. */
6039 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6040 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
6041# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6042 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6043# endif
6044
6045 /* Free but don't flush the PC register. */
6046 iemNativeRegFreeTmp(pReNative, idxPcReg);
6047# else
6048 /* Compare the shadow with the context value, they should match. */
6049 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6050 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6051# endif
6052
6053 pReNative->Core.offPc = 0;
6054
6055 return off;
6056}
6057
6058#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6059
6060
6061/*********************************************************************************************************************************
6062* Code Emitters (larger snippets) *
6063*********************************************************************************************************************************/
6064
6065/**
6066 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6067 * extending to 64-bit width.
6068 *
6069 * @returns New code buffer offset on success, UINT32_MAX on failure.
6070 * @param pReNative .
6071 * @param off The current code buffer position.
6072 * @param idxHstReg The host register to load the guest register value into.
6073 * @param enmGstReg The guest register to load.
6074 *
6075 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6076 * that is something the caller needs to do if applicable.
6077 */
6078DECL_HIDDEN_THROW(uint32_t)
6079iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6080{
6081 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6082 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6083
6084 switch (g_aGstShadowInfo[enmGstReg].cb)
6085 {
6086 case sizeof(uint64_t):
6087 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6088 case sizeof(uint32_t):
6089 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6090 case sizeof(uint16_t):
6091 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6092#if 0 /* not present in the table. */
6093 case sizeof(uint8_t):
6094 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6095#endif
6096 default:
6097 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6098 }
6099}
6100
6101
6102/**
6103 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6104 * extending to 64-bit width, extended version.
6105 *
6106 * @returns New code buffer offset on success, UINT32_MAX on failure.
6107 * @param pCodeBuf The code buffer.
6108 * @param off The current code buffer position.
6109 * @param idxHstReg The host register to load the guest register value into.
6110 * @param enmGstReg The guest register to load.
6111 *
6112 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6113 * that is something the caller needs to do if applicable.
6114 */
6115DECL_HIDDEN_THROW(uint32_t)
6116iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6117{
6118 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6119 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6120
6121 switch (g_aGstShadowInfo[enmGstReg].cb)
6122 {
6123 case sizeof(uint64_t):
6124 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6125 case sizeof(uint32_t):
6126 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6127 case sizeof(uint16_t):
6128 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6129#if 0 /* not present in the table. */
6130 case sizeof(uint8_t):
6131 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6132#endif
6133 default:
6134#ifdef IEM_WITH_THROW_CATCH
6135 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6136#else
6137 AssertReleaseFailedReturn(off);
6138#endif
6139 }
6140}
6141
6142
6143/**
6144 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6145 *
6146 * @returns New code buffer offset on success, UINT32_MAX on failure.
6147 * @param pReNative The recompiler state.
6148 * @param off The current code buffer position.
6149 * @param idxHstSimdReg The host register to load the guest register value into.
6150 * @param enmGstSimdReg The guest register to load.
6151 * @param enmLoadSz The load size of the register.
6152 *
6153 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6154 * that is something the caller needs to do if applicable.
6155 */
6156DECL_HIDDEN_THROW(uint32_t)
6157iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6158 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6159{
6160 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6161
6162 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6163 switch (enmLoadSz)
6164 {
6165 case kIemNativeGstSimdRegLdStSz_256:
6166 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6167 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6168 case kIemNativeGstSimdRegLdStSz_Low128:
6169 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6170 case kIemNativeGstSimdRegLdStSz_High128:
6171 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6172 default:
6173 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6174 }
6175}
6176
6177#ifdef VBOX_STRICT
6178
6179/**
6180 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6181 *
6182 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6183 * Trashes EFLAGS on AMD64.
6184 */
6185DECL_FORCE_INLINE(uint32_t)
6186iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6187{
6188# ifdef RT_ARCH_AMD64
6189 /* rol reg64, 32 */
6190 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6191 pCodeBuf[off++] = 0xc1;
6192 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6193 pCodeBuf[off++] = 32;
6194
6195 /* test reg32, ffffffffh */
6196 if (idxReg >= 8)
6197 pCodeBuf[off++] = X86_OP_REX_B;
6198 pCodeBuf[off++] = 0xf7;
6199 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6200 pCodeBuf[off++] = 0xff;
6201 pCodeBuf[off++] = 0xff;
6202 pCodeBuf[off++] = 0xff;
6203 pCodeBuf[off++] = 0xff;
6204
6205 /* je/jz +1 */
6206 pCodeBuf[off++] = 0x74;
6207 pCodeBuf[off++] = 0x01;
6208
6209 /* int3 */
6210 pCodeBuf[off++] = 0xcc;
6211
6212 /* rol reg64, 32 */
6213 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6214 pCodeBuf[off++] = 0xc1;
6215 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6216 pCodeBuf[off++] = 32;
6217
6218# elif defined(RT_ARCH_ARM64)
6219 /* lsr tmp0, reg64, #32 */
6220 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6221 /* cbz tmp0, +1 */
6222 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6223 /* brk #0x1100 */
6224 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6225
6226# else
6227# error "Port me!"
6228# endif
6229 return off;
6230}
6231
6232
6233/**
6234 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6235 *
6236 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6237 * Trashes EFLAGS on AMD64.
6238 */
6239DECL_HIDDEN_THROW(uint32_t)
6240iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6241{
6242# ifdef RT_ARCH_AMD64
6243 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6244# elif defined(RT_ARCH_ARM64)
6245 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6246# else
6247# error "Port me!"
6248# endif
6249 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6250 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6251 return off;
6252}
6253
6254
6255/**
6256 * Emitting code that checks that the content of register @a idxReg is the same
6257 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6258 * instruction if that's not the case.
6259 *
6260 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6261 * Trashes EFLAGS on AMD64.
6262 */
6263DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6264 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6265{
6266#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6267 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6268 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6269 return off;
6270#endif
6271
6272# ifdef RT_ARCH_AMD64
6273 /* cmp reg, [mem] */
6274 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6275 {
6276 if (idxReg >= 8)
6277 pCodeBuf[off++] = X86_OP_REX_R;
6278 pCodeBuf[off++] = 0x38;
6279 }
6280 else
6281 {
6282 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6283 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6284 else
6285 {
6286 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6287 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6288 else
6289 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6290 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6291 if (idxReg >= 8)
6292 pCodeBuf[off++] = X86_OP_REX_R;
6293 }
6294 pCodeBuf[off++] = 0x39;
6295 }
6296 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6297
6298 /* je/jz +1 */
6299 pCodeBuf[off++] = 0x74;
6300 pCodeBuf[off++] = 0x01;
6301
6302 /* int3 */
6303 pCodeBuf[off++] = 0xcc;
6304
6305 /* For values smaller than the register size, we must check that the rest
6306 of the register is all zeros. */
6307 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6308 {
6309 /* test reg64, imm32 */
6310 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6311 pCodeBuf[off++] = 0xf7;
6312 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6313 pCodeBuf[off++] = 0;
6314 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6315 pCodeBuf[off++] = 0xff;
6316 pCodeBuf[off++] = 0xff;
6317
6318 /* je/jz +1 */
6319 pCodeBuf[off++] = 0x74;
6320 pCodeBuf[off++] = 0x01;
6321
6322 /* int3 */
6323 pCodeBuf[off++] = 0xcc;
6324 }
6325 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6326 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6328
6329# elif defined(RT_ARCH_ARM64)
6330 /* mov TMP0, [gstreg] */
6331 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6332
6333 /* sub tmp0, tmp0, idxReg */
6334 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6335 /* cbz tmp0, +2 */
6336 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6337 /* brk #0x1000+enmGstReg */
6338 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6339 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6340
6341# else
6342# error "Port me!"
6343# endif
6344 return off;
6345}
6346
6347
6348/**
6349 * Emitting code that checks that the content of register @a idxReg is the same
6350 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6351 * instruction if that's not the case.
6352 *
6353 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6354 * Trashes EFLAGS on AMD64.
6355 */
6356DECL_HIDDEN_THROW(uint32_t)
6357iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6358{
6359#ifdef RT_ARCH_AMD64
6360 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6361#elif defined(RT_ARCH_ARM64)
6362 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6363# else
6364# error "Port me!"
6365# endif
6366 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6367}
6368
6369# ifdef RT_ARCH_AMD64
6370/**
6371 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6372 */
6373DECL_FORCE_INLINE_THROW(uint32_t)
6374iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6375{
6376 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6377 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6378 if (idxSimdReg >= 8)
6379 pbCodeBuf[off++] = X86_OP_REX_R;
6380 pbCodeBuf[off++] = 0x0f;
6381 pbCodeBuf[off++] = 0x38;
6382 pbCodeBuf[off++] = 0x29;
6383 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6384
6385 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6386 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6387 pbCodeBuf[off++] = X86_OP_REX_W
6388 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6389 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6390 pbCodeBuf[off++] = 0x0f;
6391 pbCodeBuf[off++] = 0x3a;
6392 pbCodeBuf[off++] = 0x16;
6393 pbCodeBuf[off++] = 0xeb;
6394 pbCodeBuf[off++] = 0x00;
6395
6396 /* cmp tmp0, 0xffffffffffffffff. */
6397 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6398 pbCodeBuf[off++] = 0x83;
6399 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6400 pbCodeBuf[off++] = 0xff;
6401
6402 /* je/jz +1 */
6403 pbCodeBuf[off++] = 0x74;
6404 pbCodeBuf[off++] = 0x01;
6405
6406 /* int3 */
6407 pbCodeBuf[off++] = 0xcc;
6408
6409 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6410 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6411 pbCodeBuf[off++] = X86_OP_REX_W
6412 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6413 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6414 pbCodeBuf[off++] = 0x0f;
6415 pbCodeBuf[off++] = 0x3a;
6416 pbCodeBuf[off++] = 0x16;
6417 pbCodeBuf[off++] = 0xeb;
6418 pbCodeBuf[off++] = 0x01;
6419
6420 /* cmp tmp0, 0xffffffffffffffff. */
6421 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6422 pbCodeBuf[off++] = 0x83;
6423 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6424 pbCodeBuf[off++] = 0xff;
6425
6426 /* je/jz +1 */
6427 pbCodeBuf[off++] = 0x74;
6428 pbCodeBuf[off++] = 0x01;
6429
6430 /* int3 */
6431 pbCodeBuf[off++] = 0xcc;
6432
6433 return off;
6434}
6435# endif /* RT_ARCH_AMD64 */
6436
6437
6438/**
6439 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6440 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6441 * instruction if that's not the case.
6442 *
6443 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6444 * Trashes EFLAGS on AMD64.
6445 */
6446DECL_HIDDEN_THROW(uint32_t)
6447iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6448 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6449{
6450 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6451 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6452 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6453 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6454 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6455 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6456 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6457 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6458 return off;
6459
6460# ifdef RT_ARCH_AMD64
6461 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6462 {
6463 /* movdqa vectmp0, idxSimdReg */
6464 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6465
6466 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6467
6468 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6469 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6470 }
6471
6472 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6473 {
6474 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6475 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6476
6477 /* vextracti128 vectmp0, idxSimdReg, 1 */
6478 pbCodeBuf[off++] = X86_OP_VEX3;
6479 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6480 | X86_OP_VEX3_BYTE1_X
6481 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6482 | 0x03; /* Opcode map */
6483 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6484 pbCodeBuf[off++] = 0x39;
6485 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6486 pbCodeBuf[off++] = 0x01;
6487
6488 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6489 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6490 }
6491
6492# elif defined(RT_ARCH_ARM64)
6493 /* mov vectmp0, [gstreg] */
6494 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6495
6496 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6497 {
6498 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6499 /* eor vectmp0, vectmp0, idxSimdReg */
6500 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6501 /* uaddlv vectmp0, vectmp0.16B */
6502 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6503 /* umov tmp0, vectmp0.H[0] */
6504 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6505 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6506 /* cbz tmp0, +1 */
6507 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6508 /* brk #0x1000+enmGstReg */
6509 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6510 }
6511
6512 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6513 {
6514 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6515 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6516 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6517 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6518 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6519 /* umov tmp0, (vectmp0 + 1).H[0] */
6520 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6521 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6522 /* cbz tmp0, +1 */
6523 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6524 /* brk #0x1000+enmGstReg */
6525 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6526 }
6527
6528# else
6529# error "Port me!"
6530# endif
6531
6532 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6533 return off;
6534}
6535
6536
6537/**
6538 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6539 * important bits.
6540 *
6541 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6542 * Trashes EFLAGS on AMD64.
6543 */
6544DECL_HIDDEN_THROW(uint32_t)
6545iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6546{
6547 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6548 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6549 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6550 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6551
6552# ifdef RT_ARCH_AMD64
6553 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6554
6555 /* je/jz +1 */
6556 pbCodeBuf[off++] = 0x74;
6557 pbCodeBuf[off++] = 0x01;
6558
6559 /* int3 */
6560 pbCodeBuf[off++] = 0xcc;
6561
6562# elif defined(RT_ARCH_ARM64)
6563 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6564
6565 /* b.eq +1 */
6566 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6567 /* brk #0x2000 */
6568 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6569
6570# else
6571# error "Port me!"
6572# endif
6573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6574
6575 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6576 return off;
6577}
6578
6579#endif /* VBOX_STRICT */
6580
6581
6582#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6583/**
6584 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6585 */
6586DECL_HIDDEN_THROW(uint32_t)
6587iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6588{
6589 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6590
6591 fEflNeeded &= X86_EFL_STATUS_BITS;
6592 if (fEflNeeded)
6593 {
6594# ifdef RT_ARCH_AMD64
6595 /* test dword [pVCpu + offVCpu], imm32 */
6596 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6597 if (fEflNeeded <= 0xff)
6598 {
6599 pCodeBuf[off++] = 0xf6;
6600 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6601 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6602 }
6603 else
6604 {
6605 pCodeBuf[off++] = 0xf7;
6606 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6607 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6608 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6609 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6610 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6611 }
6612
6613 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6614 pCodeBuf[off++] = 0xcc;
6615
6616 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6617
6618# else
6619 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6620 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6621 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6622# ifdef RT_ARCH_ARM64
6623 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6624 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6625# else
6626# error "Port me!"
6627# endif
6628 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6629# endif
6630 }
6631 return off;
6632}
6633#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6634
6635
6636/**
6637 * Emits a code for checking the return code of a call and rcPassUp, returning
6638 * from the code if either are non-zero.
6639 */
6640DECL_HIDDEN_THROW(uint32_t)
6641iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6642{
6643#ifdef RT_ARCH_AMD64
6644 /*
6645 * AMD64: eax = call status code.
6646 */
6647
6648 /* edx = rcPassUp */
6649 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6650# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6651 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6652# endif
6653
6654 /* edx = eax | rcPassUp */
6655 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6656 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6657 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6658 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6659
6660 /* Jump to non-zero status return path. */
6661 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6662
6663 /* done. */
6664
6665#elif RT_ARCH_ARM64
6666 /*
6667 * ARM64: w0 = call status code.
6668 */
6669 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6670
6671# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6672 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6673 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6674# endif
6675 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6676
6677 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6678
6679 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6680 ARMV8_A64_REG_X4, true /*f64Bit*/);
6681
6682#else
6683# error "port me"
6684#endif
6685 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6686 RT_NOREF_PV(idxInstr);
6687 return off;
6688}
6689
6690
6691/**
6692 * Emits a call to a CImpl function or something similar.
6693 */
6694DECL_HIDDEN_THROW(uint32_t)
6695iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6696 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6697{
6698 /* Writeback everything. */
6699 off = iemNativeRegFlushPendingWrites(pReNative, off);
6700
6701 /*
6702 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6703 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6704 */
6705 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6706 fGstShwFlush
6707 | RT_BIT_64(kIemNativeGstReg_Pc)
6708 | RT_BIT_64(kIemNativeGstReg_EFlags));
6709 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6710
6711 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6712
6713 /*
6714 * Load the parameters.
6715 */
6716#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
6717 /* Special code the hidden VBOXSTRICTRC pointer. */
6718 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6719 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6720 if (cAddParams > 0)
6721 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6722 if (cAddParams > 1)
6723# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6724 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam1);
6725# else
6726 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6727# endif
6728 if (cAddParams > 2)
6729# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 6
6730 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG5_GREG, uParam2);
6731# else
6732 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6733# endif
6734 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6735
6736#else
6737 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6738 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6739 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6740 if (cAddParams > 0)
6741 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6742 if (cAddParams > 1)
6743 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6744 if (cAddParams > 2)
6745# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6746 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6747# else
6748 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6749# endif
6750#endif
6751
6752 /*
6753 * Make the call.
6754 */
6755 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6756
6757#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
6758 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6759#endif
6760
6761#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6762 pReNative->Core.fDebugPcInitialized = false;
6763 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6764#endif
6765
6766 /*
6767 * Check the status code.
6768 */
6769 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6770}
6771
6772
6773/**
6774 * Emits a call to a threaded worker function.
6775 */
6776DECL_HIDDEN_THROW(uint32_t)
6777iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6778{
6779 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6780 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6781
6782 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6783 off = iemNativeRegFlushPendingWrites(pReNative, off);
6784
6785 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6786 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6787
6788#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6789 /* The threaded function may throw / long jmp, so set current instruction
6790 number if we're counting. */
6791 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6792#endif
6793
6794 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6795
6796#ifdef RT_ARCH_AMD64
6797 /* Load the parameters and emit the call. */
6798# ifdef RT_OS_WINDOWS
6799# ifndef VBOXSTRICTRC_STRICT_ENABLED
6800 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6801 if (cParams > 0)
6802 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6803 if (cParams > 1)
6804 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6805 if (cParams > 2)
6806 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6807# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6808 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6809 if (cParams > 0)
6810 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6811 if (cParams > 1)
6812 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6813 if (cParams > 2)
6814 {
6815 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6816 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6817 }
6818 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6819# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6820# else
6821 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6822 if (cParams > 0)
6823 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6824 if (cParams > 1)
6825 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6826 if (cParams > 2)
6827 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6828# endif
6829
6830 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6831
6832# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6833 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6834# endif
6835
6836#elif RT_ARCH_ARM64
6837 /*
6838 * ARM64:
6839 */
6840# if !defined(RT_OS_WINDOWS) || !defined(VBOXSTRICTRC_STRICT_ENABLED)
6841 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6842 if (cParams > 0)
6843 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6844 if (cParams > 1)
6845 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6846 if (cParams > 2)
6847 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6848# else
6849 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6850 if (cParams > 0)
6851 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[0]);
6852 if (cParams > 1)
6853 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[1]);
6854 if (cParams > 2)
6855 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, pCallEntry->auParams[2]);
6856 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6857# endif
6858
6859 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6860
6861# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6862 off = iemNativeEmitLoadGprByBpU32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6863# endif
6864
6865#else
6866# error "port me"
6867#endif
6868
6869#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6870 pReNative->Core.fDebugPcInitialized = false;
6871 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6872#endif
6873
6874 /*
6875 * Check the status code.
6876 */
6877 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6878
6879 return off;
6880}
6881
6882
6883/**
6884 * The default liveness function, matching iemNativeEmitThreadedCall.
6885 */
6886IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6887{
6888 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6889 RT_NOREF(pCallEntry);
6890}
6891
6892#ifdef VBOX_WITH_STATISTICS
6893
6894/**
6895 * Emits code to update the thread call statistics.
6896 */
6897DECL_INLINE_THROW(uint32_t)
6898iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6899{
6900 /*
6901 * Update threaded function stats.
6902 */
6903 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6904 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6905# if defined(RT_ARCH_ARM64)
6906 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6907 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6908 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6909 iemNativeRegFreeTmp(pReNative, idxTmp1);
6910 iemNativeRegFreeTmp(pReNative, idxTmp2);
6911# else
6912 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6913# endif
6914 return off;
6915}
6916
6917
6918/**
6919 * Emits code to update the TB exit reason statistics.
6920 */
6921DECL_INLINE_THROW(uint32_t)
6922iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6923{
6924 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6925 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6926 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6927 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6928 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6929
6930 return off;
6931}
6932
6933#endif /* VBOX_WITH_STATISTICS */
6934
6935/**
6936 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6937 */
6938static uint32_t
6939iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6940{
6941 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6942 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6943
6944 /* Jump to ReturnBreak if the return register is NULL. */
6945 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6946 true /*f64Bit*/, offReturnBreak);
6947
6948 /* Okay, continue executing the next TB. */
6949 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6950 return off;
6951}
6952
6953
6954/**
6955 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6956 */
6957static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6958{
6959 /* set the return status */
6960 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6961}
6962
6963
6964/**
6965 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6966 */
6967static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6968{
6969 /* set the return status */
6970 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6971}
6972
6973
6974/**
6975 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6976 */
6977static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6978{
6979 /* set the return status */
6980 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6981}
6982
6983
6984/**
6985 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6986 */
6987static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6988{
6989 /*
6990 * Generate the rc + rcPassUp fiddling code.
6991 */
6992 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6993#ifdef RT_ARCH_AMD64
6994# ifdef RT_OS_WINDOWS
6995# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6996 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6997# endif
6998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6999 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7000# else
7001 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7002 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7003# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7004 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7005# endif
7006# endif
7007# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7008 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7009# endif
7010
7011#else
7012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7013 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7014 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7015#endif
7016
7017 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7018 return off;
7019}
7020
7021
7022/**
7023 * Emits a standard epilog.
7024 */
7025static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7026{
7027 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7028
7029 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
7030 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7031
7032 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7033 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7034
7035 /*
7036 * Restore registers and return.
7037 */
7038#ifdef RT_ARCH_AMD64
7039 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7040
7041 /* Reposition esp at the r15 restore point. */
7042 pbCodeBuf[off++] = X86_OP_REX_W;
7043 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7044 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7045 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7046
7047 /* Pop non-volatile registers and return */
7048 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7049 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7050 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7051 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7052 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7053 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7054 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7055 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7056# ifdef RT_OS_WINDOWS
7057 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7058 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7059# endif
7060 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7061 pbCodeBuf[off++] = 0xc9; /* leave */
7062 pbCodeBuf[off++] = 0xc3; /* ret */
7063 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7064
7065#elif RT_ARCH_ARM64
7066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7067
7068 /* ldp x19, x20, [sp #(IEMNATIVE_FRAME_VAR_SIZE+IEMNATIVE_FRAME_ALIGN_SIZE)]! ; Unallocate the variable space and restore x19+x20. */
7069 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_ALIGN_SIZE < 64*8);
7070 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7071 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7072 (IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_ALIGN_SIZE) / 8);
7073 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7074 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7075 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7076 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7077 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7078 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7079 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7080 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7081 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7082 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7083 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7084 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7085
7086 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7087 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7088 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7089 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7090
7091 /* retab / ret */
7092# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7093 if (1)
7094 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7095 else
7096# endif
7097 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7098
7099#else
7100# error "port me"
7101#endif
7102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7103
7104 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7105 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7106
7107 return off;
7108}
7109
7110
7111
7112/*********************************************************************************************************************************
7113* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7114*********************************************************************************************************************************/
7115
7116/**
7117 * Internal work that allocates a variable with kind set to
7118 * kIemNativeVarKind_Invalid and no current stack allocation.
7119 *
7120 * The kind will either be set by the caller or later when the variable is first
7121 * assigned a value.
7122 *
7123 * @returns Unpacked index.
7124 * @internal
7125 */
7126DECL_INLINE_THROW(uint8_t) iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7127{
7128 Assert(cbType > 0 && cbType <= 64);
7129 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7130 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7131
7132 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7133
7134 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[idxVar]; /* VS 2019 gets a bit weird on us otherwise. */
7135#if 0
7136 pVar->cbVar = cbType;
7137 pVar->enmKind = kIemNativeVarKind_Invalid;
7138 pVar->fRegAcquired = false;
7139 pVar->fSimdReg = false;
7140 pVar->idxReg = UINT8_MAX;
7141 pVar->uArgNo = UINT8_MAX;
7142 pVar->idxStackSlot = UINT8_MAX;
7143 pVar->idxReferrerVar = UINT8_MAX;
7144 pVar->u.uValue = 0;
7145#else
7146 /* Neither clang 15 nor VC++ 2019 is able to generate this from the above. */
7147 AssertCompileMemberOffset(IEMNATIVEVAR, cbVar, 1);
7148 AssertCompile((int)kIemNativeVarKind_Invalid == 0);
7149 pVar->u32Init0 = (uint32_t)cbType << 8;
7150 pVar->u32Init1 = UINT32_MAX;
7151 pVar->u.uValue = 0;
7152#endif
7153 return idxVar;
7154}
7155
7156
7157/**
7158 * Internal work that allocates an argument variable w/o setting enmKind.
7159 *
7160 * @returns Unpacked index.
7161 * @internal
7162 */
7163static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7164{
7165 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7166 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7167 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7168
7169 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7170 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7171 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7172 return idxVar;
7173}
7174
7175
7176/**
7177 * Gets the stack slot for a stack variable, allocating one if necessary.
7178 *
7179 * Calling this function implies that the stack slot will contain a valid
7180 * variable value. The caller deals with any register currently assigned to the
7181 * variable, typically by spilling it into the stack slot.
7182 *
7183 * @returns The stack slot number.
7184 * @param pReNative The recompiler state.
7185 * @param idxVar The variable.
7186 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7187 */
7188DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7189{
7190 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7191 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7192 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7193
7194 /* Already got a slot? */
7195 uint8_t const idxStackSlot = pVar->idxStackSlot;
7196 if (idxStackSlot != UINT8_MAX)
7197 {
7198 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7199 return idxStackSlot;
7200 }
7201
7202 /*
7203 * A single slot is easy to allocate.
7204 * Allocate them from the top end, closest to BP, to reduce the displacement.
7205 */
7206 if (pVar->cbVar <= sizeof(uint64_t))
7207 {
7208 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7209 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7210 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7211 pVar->idxStackSlot = (uint8_t)iSlot;
7212 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7213 return (uint8_t)iSlot;
7214 }
7215
7216 /*
7217 * We need more than one stack slot.
7218 *
7219 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7220 */
7221 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7222 Assert(pVar->cbVar <= 64);
7223 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7224 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7225 uint32_t bmStack = pReNative->Core.bmStack;
7226 while (bmStack != UINT32_MAX)
7227 {
7228 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7229 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7230 iSlot = (iSlot - 1) & ~fBitAlignMask;
7231 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7232 {
7233 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7234 pVar->idxStackSlot = (uint8_t)iSlot;
7235 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7236 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7237 return (uint8_t)iSlot;
7238 }
7239
7240 bmStack |= (fBitAllocMask << iSlot);
7241 }
7242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7243}
7244
7245
7246/**
7247 * Changes the variable to a stack variable.
7248 *
7249 * Currently this is s only possible to do the first time the variable is used,
7250 * switching later is can be implemented but not done.
7251 *
7252 * @param pReNative The recompiler state.
7253 * @param idxVar The variable.
7254 * @throws VERR_IEM_VAR_IPE_2
7255 */
7256DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7257{
7258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7259 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7260 if (pVar->enmKind != kIemNativeVarKind_Stack)
7261 {
7262 /* We could in theory transition from immediate to stack as well, but it
7263 would involve the caller doing work storing the value on the stack. So,
7264 till that's required we only allow transition from invalid. */
7265 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7266 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7267 pVar->enmKind = kIemNativeVarKind_Stack;
7268
7269 /* Note! We don't allocate a stack slot here, that's only done when a
7270 slot is actually needed to hold a variable value. */
7271 }
7272}
7273
7274
7275/**
7276 * Sets it to a variable with a constant value.
7277 *
7278 * This does not require stack storage as we know the value and can always
7279 * reload it, unless of course it's referenced.
7280 *
7281 * @param pReNative The recompiler state.
7282 * @param idxVar The variable.
7283 * @param uValue The immediate value.
7284 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7285 */
7286DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7287{
7288 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7289 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7290 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7291 {
7292 /* Only simple transitions for now. */
7293 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7294 pVar->enmKind = kIemNativeVarKind_Immediate;
7295 }
7296 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7297
7298 pVar->u.uValue = uValue;
7299 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7300 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7301 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7302}
7303
7304
7305/**
7306 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7307 *
7308 * This does not require stack storage as we know the value and can always
7309 * reload it. Loading is postponed till needed.
7310 *
7311 * @param pReNative The recompiler state.
7312 * @param idxVar The variable. Unpacked.
7313 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7314 *
7315 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7316 * @internal
7317 */
7318static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7319{
7320 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7321 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7322
7323 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7324 {
7325 /* Only simple transitions for now. */
7326 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7327 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7328 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7329 }
7330 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7331
7332 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7333
7334 /* Update the other variable, ensure it's a stack variable. */
7335 /** @todo handle variables with const values... that'll go boom now. */
7336 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7337 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7338}
7339
7340
7341/**
7342 * Sets the variable to a reference (pointer) to a guest register reference.
7343 *
7344 * This does not require stack storage as we know the value and can always
7345 * reload it. Loading is postponed till needed.
7346 *
7347 * @param pReNative The recompiler state.
7348 * @param idxVar The variable.
7349 * @param enmRegClass The class guest registers to reference.
7350 * @param idxReg The register within @a enmRegClass to reference.
7351 *
7352 * @throws VERR_IEM_VAR_IPE_2
7353 */
7354DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7355 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7356{
7357 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7358 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7359
7360 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7361 {
7362 /* Only simple transitions for now. */
7363 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7364 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7365 }
7366 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7367
7368 pVar->u.GstRegRef.enmClass = enmRegClass;
7369 pVar->u.GstRegRef.idx = idxReg;
7370}
7371
7372
7373DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7374{
7375 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7376}
7377
7378
7379DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7380{
7381 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7382
7383 /* Since we're using a generic uint64_t value type, we must truncate it if
7384 the variable is smaller otherwise we may end up with too large value when
7385 scaling up a imm8 w/ sign-extension.
7386
7387 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7388 in the bios, bx=1) when running on arm, because clang expect 16-bit
7389 register parameters to have bits 16 and up set to zero. Instead of
7390 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7391 CF value in the result. */
7392 switch (cbType)
7393 {
7394 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7395 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7396 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7397 }
7398 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7399 return idxVar;
7400}
7401
7402
7403DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7404{
7405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7406 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7407 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7408 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7409 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7410 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7411
7412 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7413 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7414 return idxArgVar;
7415}
7416
7417
7418DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7419{
7420 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7421 /* Don't set to stack now, leave that to the first use as for instance
7422 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7423 return idxVar;
7424}
7425
7426
7427DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7428{
7429 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7430
7431 /* Since we're using a generic uint64_t value type, we must truncate it if
7432 the variable is smaller otherwise we may end up with too large value when
7433 scaling up a imm8 w/ sign-extension. */
7434 switch (cbType)
7435 {
7436 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7437 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7438 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7439 }
7440 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7441 return idxVar;
7442}
7443
7444
7445DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7446 uint8_t cbType, uint8_t idxVarOther)
7447{
7448 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7449 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7450
7451 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarOther, poff);
7452 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7453
7454/** @todo combine MOV and AND using MOVZX/similar. */
7455 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7456
7457 /* Truncate the value to this variables size. */
7458 switch (cbType)
7459 {
7460 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7461 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7462 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7463 }
7464
7465 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7466 iemNativeVarRegisterRelease(pReNative, idxVar);
7467 return idxVar;
7468}
7469
7470
7471/**
7472 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7473 * fixed till we call iemNativeVarRegisterRelease.
7474 *
7475 * @returns The host register number.
7476 * @param pReNative The recompiler state.
7477 * @param idxVar The variable.
7478 * @param poff Pointer to the instruction buffer offset.
7479 * In case a register needs to be freed up or the value
7480 * loaded off the stack.
7481 * @param idxRegPref Preferred register number or UINT8_MAX.
7482 *
7483 * @tparam a_fInitialized Set if the variable must already have been
7484 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7485 * if this is not the case.
7486 * @tparam a_fWithRegPref If idxRegPref is valid.
7487 *
7488 * @note Must not modify the host status flags!
7489 */
7490template<bool const a_fInitialized, bool const a_fWithRegPref>
7491DECL_FORCE_INLINE_THROW(uint8_t)
7492iemNativeVarRegisterAcquireSlowInt(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7493{
7494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7495 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7496 Assert(pVar->cbVar <= 8);
7497 Assert(!pVar->fRegAcquired);
7498 Assert(!a_fWithRegPref || idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs));
7499
7500 /* This slow code path only handles the case where no register have been
7501 allocated for the variable yet. */
7502 Assert(pVar->idxReg == UINT8_MAX);
7503
7504 /*
7505 * If the kind of variable has not yet been set, default to 'stack'.
7506 */
7507 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7508 && pVar->enmKind < kIemNativeVarKind_End);
7509 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7510 iemNativeVarSetKindToStack(pReNative, idxVar);
7511
7512 /*
7513 * We have to allocate a register for the variable, even if its a stack one
7514 * as we don't know if there are modification being made to it before its
7515 * finalized (todo: analyze and insert hints about that?).
7516 *
7517 * If we can, we try get the correct register for argument variables. This
7518 * is assuming that most argument variables are fetched as close as possible
7519 * to the actual call, so that there aren't any interfering hidden calls
7520 * (memory accesses, etc) inbetween.
7521 *
7522 * If we cannot or it's a variable, we make sure no argument registers
7523 * that will be used by this MC block will be allocated here, and we always
7524 * prefer non-volatile registers to avoid needing to spill stuff for internal
7525 * call.
7526 */
7527 /** @todo Detect too early argument value fetches and warn about hidden
7528 * calls causing less optimal code to be generated in the python script. */
7529
7530 uint8_t idxReg;
7531 uint8_t const uArgNo = pVar->uArgNo;
7532 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7533 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7534 {
7535 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7536
7537#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7538 /* Writeback any dirty shadow registers we are about to unshadow. */
7539 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7540#endif
7541
7542 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7543 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7544 }
7545 else if ( !a_fWithRegPref
7546 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7547 {
7548 /** @todo there must be a better way for this and boot cArgsX? */
7549 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7550 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7551 & ~pReNative->Core.bmHstRegsWithGstShadow
7552 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7553 & fNotArgsMask;
7554 if (fRegs)
7555 {
7556 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7557 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7558 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7559 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7560 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7561 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7562 }
7563 else
7564 {
7565 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7566 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7567 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7568 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7569 }
7570 }
7571 else
7572 {
7573 idxReg = idxRegPref;
7574 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7575 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7576 }
7577 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7578 pVar->idxReg = idxReg;
7579 pVar->fSimdReg = false;
7580
7581 /*
7582 * Load it off the stack if we've got a stack slot.
7583 */
7584 uint8_t const idxStackSlot = pVar->idxStackSlot;
7585 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7586 {
7587 Assert(a_fInitialized);
7588 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7589 switch (pVar->cbVar)
7590 {
7591 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7592 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7593 case 3: AssertFailed(); RT_FALL_THRU();
7594 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7595 default: AssertFailed(); RT_FALL_THRU();
7596 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7597 }
7598 }
7599 else
7600 {
7601 Assert(idxStackSlot == UINT8_MAX);
7602 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7603 AssertStmt(!a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7604 else
7605 {
7606 /*
7607 * Convert from immediate to stack/register. This is currently only
7608 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7609 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7610 */
7611 AssertStmt(a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7612 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7613 idxVar, idxReg, pVar->u.uValue));
7614 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7615 pVar->enmKind = kIemNativeVarKind_Stack;
7616 }
7617 }
7618
7619 pVar->fRegAcquired = true;
7620 return idxReg;
7621}
7622
7623
7624/** See iemNativeVarRegisterAcquireSlowInt for details. */
7625DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7626{
7627 /* very likely */
7628 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 0]);
7629 return iemNativeVarRegisterAcquireSlowInt<false, false>(pReNative, idxVar, poff, UINT8_MAX);
7630}
7631
7632
7633/** See iemNativeVarRegisterAcquireSlowInt for details. */
7634DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireInitedSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7635{
7636 /* even more likely */
7637 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 2]);
7638 return iemNativeVarRegisterAcquireSlowInt<true, false>(pReNative, idxVar, poff, UINT8_MAX);
7639}
7640
7641
7642/** See iemNativeVarRegisterAcquireSlowInt for details. */
7643DECL_HIDDEN_THROW(uint8_t)
7644iemNativeVarRegisterAcquireWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7645{
7646 /* unused */
7647 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 4]);
7648 return iemNativeVarRegisterAcquireSlowInt<false, true>(pReNative, idxVar, poff, idxRegPref);
7649}
7650
7651
7652/** See iemNativeVarRegisterAcquireSlowInt for details. */
7653DECL_HIDDEN_THROW(uint8_t)
7654iemNativeVarRegisterAcquireInitedWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7655{
7656 /* very very likely */
7657 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 6]);
7658 return iemNativeVarRegisterAcquireSlowInt<true, true>(pReNative, idxVar, poff, idxRegPref);
7659}
7660
7661
7662/**
7663 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7664 * fixed till we call iemNativeVarRegisterRelease.
7665 *
7666 * @returns The host register number.
7667 * @param pReNative The recompiler state.
7668 * @param idxVar The variable.
7669 * @param poff Pointer to the instruction buffer offset.
7670 * In case a register needs to be freed up or the value
7671 * loaded off the stack.
7672 * @param fInitialized Set if the variable must already have been initialized.
7673 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7674 * the case.
7675 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7676 */
7677/** @todo Create variants for the last two params like we've done for the
7678 * GPR variant? */
7679DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7680 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7681{
7682 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7683 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7684 Assert( pVar->cbVar == sizeof(RTUINT128U)
7685 || pVar->cbVar == sizeof(RTUINT256U));
7686 Assert(!pVar->fRegAcquired);
7687
7688/** @todo inline this bit? */
7689 uint8_t idxReg = pVar->idxReg;
7690 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7691 {
7692 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7693 && pVar->enmKind < kIemNativeVarKind_End);
7694 pVar->fRegAcquired = true;
7695 return idxReg;
7696 }
7697
7698 /*
7699 * If the kind of variable has not yet been set, default to 'stack'.
7700 */
7701 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7702 && pVar->enmKind < kIemNativeVarKind_End);
7703 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7704 iemNativeVarSetKindToStack(pReNative, idxVar);
7705
7706 /*
7707 * We have to allocate a register for the variable, even if its a stack one
7708 * as we don't know if there are modification being made to it before its
7709 * finalized (todo: analyze and insert hints about that?).
7710 *
7711 * If we can, we try get the correct register for argument variables. This
7712 * is assuming that most argument variables are fetched as close as possible
7713 * to the actual call, so that there aren't any interfering hidden calls
7714 * (memory accesses, etc) inbetween.
7715 *
7716 * If we cannot or it's a variable, we make sure no argument registers
7717 * that will be used by this MC block will be allocated here, and we always
7718 * prefer non-volatile registers to avoid needing to spill stuff for internal
7719 * call.
7720 */
7721 /** @todo Detect too early argument value fetches and warn about hidden
7722 * calls causing less optimal code to be generated in the python script. */
7723
7724 uint8_t const uArgNo = pVar->uArgNo;
7725 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7726
7727 /* SIMD is bit simpler for now because there is no support for arguments. */
7728 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7729 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7730 {
7731 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7732 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7733 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7734 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7735 & fNotArgsMask;
7736 if (fRegs)
7737 {
7738 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7739 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7740 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7741 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7742 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7743 }
7744 else
7745 {
7746 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7747 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7748 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7749 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7750 }
7751 }
7752 else
7753 {
7754 idxReg = idxRegPref;
7755 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7756 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7757 }
7758 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7759 pVar->idxReg = idxReg;
7760 pVar->fSimdReg = true;
7761
7762 /*
7763 * Load it off the stack if we've got a stack slot.
7764 */
7765 uint8_t const idxStackSlot = pVar->idxStackSlot;
7766 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7767 {
7768 Assert(fInitialized);
7769 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7770 switch (pVar->cbVar)
7771 {
7772 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7773 default: AssertFailed(); RT_FALL_THRU();
7774 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7775 }
7776 }
7777 else
7778 {
7779 Assert(idxStackSlot == UINT8_MAX);
7780 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7781 }
7782 pVar->fRegAcquired = true;
7783 return idxReg;
7784}
7785
7786
7787/**
7788 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7789 * guest register.
7790 *
7791 * This function makes sure there is a register for it and sets it to be the
7792 * current shadow copy of @a enmGstReg.
7793 *
7794 * @returns The host register number.
7795 * @param pReNative The recompiler state.
7796 * @param idxVar The variable.
7797 * @param enmGstReg The guest register this variable will be written to
7798 * after this call.
7799 * @param poff Pointer to the instruction buffer offset.
7800 * In case a register needs to be freed up or if the
7801 * variable content needs to be loaded off the stack.
7802 *
7803 * @note We DO NOT expect @a idxVar to be an argument variable,
7804 * because we can only in the commit stage of an instruction when this
7805 * function is used.
7806 */
7807DECL_HIDDEN_THROW(uint8_t)
7808iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7809{
7810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7811 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7812 Assert(!pVar->fRegAcquired);
7813 AssertMsgStmt( pVar->cbVar <= 8
7814 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7815 || pVar->enmKind == kIemNativeVarKind_Stack),
7816 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7817 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7818 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7819
7820 /*
7821 * This shouldn't ever be used for arguments, unless it's in a weird else
7822 * branch that doesn't do any calling and even then it's questionable.
7823 *
7824 * However, in case someone writes crazy wrong MC code and does register
7825 * updates before making calls, just use the regular register allocator to
7826 * ensure we get a register suitable for the intended argument number.
7827 */
7828 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7829
7830 /*
7831 * If there is already a register for the variable, we transfer/set the
7832 * guest shadow copy assignment to it.
7833 */
7834 uint8_t idxReg = pVar->idxReg;
7835 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7836 {
7837#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7838 AssertCompile(kIemNativeGstReg_GprFirst == 0);
7839 if (enmGstReg <= kIemNativeGstReg_GprLast)
7840 {
7841# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7842 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7843 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7844# endif
7845 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7846 }
7847#endif
7848
7849 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7850 {
7851 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7852 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7853 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7854 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7855 }
7856 else
7857 {
7858 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7859 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7860 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7861 }
7862 pVar->fRegAcquired = true;
7863 return idxReg;
7864 }
7865 Assert(pVar->uArgNo == UINT8_MAX);
7866
7867 /*
7868 * Because this is supposed to be the commit stage, we're just tag along with the
7869 * temporary register allocator and upgrade it to a variable register.
7870 */
7871 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7872 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7873 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7874 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7875 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7876 pVar->idxReg = idxReg;
7877
7878 /*
7879 * Now we need to load the register value.
7880 */
7881 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7882 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7883 else
7884 {
7885 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7886 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7887 switch (pVar->cbVar)
7888 {
7889 case sizeof(uint64_t):
7890 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7891 break;
7892 case sizeof(uint32_t):
7893 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7894 break;
7895 case sizeof(uint16_t):
7896 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7897 break;
7898 case sizeof(uint8_t):
7899 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7900 break;
7901 default:
7902 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7903 }
7904 }
7905
7906 pVar->fRegAcquired = true;
7907 return idxReg;
7908}
7909
7910
7911/**
7912 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7913 *
7914 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7915 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7916 * requirement of flushing anything in volatile host registers when making a
7917 * call.
7918 *
7919 * @returns New @a off value.
7920 * @param pReNative The recompiler state.
7921 * @param off The code buffer position.
7922 * @param fHstGprNotToSave Set of GPRs not to save & restore.
7923 */
7924DECL_HIDDEN_THROW(uint32_t)
7925iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
7926{
7927 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
7928 if (fHstRegs)
7929 {
7930 do
7931 {
7932 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7933 fHstRegs &= ~RT_BIT_32(idxHstReg);
7934
7935 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7936 {
7937 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7939 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7940 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7941 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7942 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7943 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7944 {
7945 case kIemNativeVarKind_Stack:
7946 {
7947 /* Temporarily spill the variable register. */
7948 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7949 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7950 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7951 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7952 continue;
7953 }
7954
7955 case kIemNativeVarKind_Immediate:
7956 case kIemNativeVarKind_VarRef:
7957 case kIemNativeVarKind_GstRegRef:
7958 /* It is weird to have any of these loaded at this point. */
7959 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7960 continue;
7961
7962 case kIemNativeVarKind_End:
7963 case kIemNativeVarKind_Invalid:
7964 break;
7965 }
7966 AssertFailed();
7967 }
7968 else
7969 {
7970 /*
7971 * Allocate a temporary stack slot and spill the register to it.
7972 */
7973 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7974 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7975 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7976 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7977 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7978 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7979 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7980 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7981 }
7982 } while (fHstRegs);
7983 }
7984
7985 /*
7986 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7987 * which would be more difficult due to spanning multiple stack slots and different sizes
7988 * (besides we only have a limited amount of slots at the moment).
7989 *
7990 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7991 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7992 */
7993 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7994
7995 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
7996 if (fHstRegs)
7997 {
7998 do
7999 {
8000 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8001 fHstRegs &= ~RT_BIT_32(idxHstReg);
8002
8003 /* Fixed reserved and temporary registers don't need saving. */
8004 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
8005 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
8006 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8007
8008 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8009 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8010 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8011 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8012 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8013 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8014 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8015 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8016 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8017 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8018 {
8019 case kIemNativeVarKind_Stack:
8020 {
8021 /* Temporarily spill the variable register. */
8022 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8023 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8024 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8025 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8026 if (cbVar == sizeof(RTUINT128U))
8027 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8028 else
8029 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8030 continue;
8031 }
8032
8033 case kIemNativeVarKind_Immediate:
8034 case kIemNativeVarKind_VarRef:
8035 case kIemNativeVarKind_GstRegRef:
8036 /* It is weird to have any of these loaded at this point. */
8037 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8038 continue;
8039
8040 case kIemNativeVarKind_End:
8041 case kIemNativeVarKind_Invalid:
8042 break;
8043 }
8044 AssertFailed();
8045 } while (fHstRegs);
8046 }
8047 return off;
8048}
8049
8050
8051/**
8052 * Emit code to restore volatile registers after to a call to a helper.
8053 *
8054 * @returns New @a off value.
8055 * @param pReNative The recompiler state.
8056 * @param off The code buffer position.
8057 * @param fHstGprNotToSave Set of registers not to save & restore.
8058 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8059 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8060 */
8061DECL_HIDDEN_THROW(uint32_t)
8062iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
8063{
8064 /*
8065 * GPRs
8066 */
8067 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
8068 if (fHstRegs)
8069 {
8070 do
8071 {
8072 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8073 fHstRegs &= ~RT_BIT_32(idxHstReg);
8074
8075 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8076 {
8077 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8078 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8079 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8080 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8081 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8082 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8083 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8084 {
8085 case kIemNativeVarKind_Stack:
8086 {
8087 /* Unspill the variable register. */
8088 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8089 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8090 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8091 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8092 continue;
8093 }
8094
8095 case kIemNativeVarKind_Immediate:
8096 case kIemNativeVarKind_VarRef:
8097 case kIemNativeVarKind_GstRegRef:
8098 /* It is weird to have any of these loaded at this point. */
8099 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8100 continue;
8101
8102 case kIemNativeVarKind_End:
8103 case kIemNativeVarKind_Invalid:
8104 break;
8105 }
8106 AssertFailed();
8107 }
8108 else
8109 {
8110 /*
8111 * Restore from temporary stack slot.
8112 */
8113 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8114 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8115 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8116 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8117
8118 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8119 }
8120 } while (fHstRegs);
8121 }
8122
8123 /*
8124 * SIMD registers.
8125 */
8126 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8127 if (fHstRegs)
8128 {
8129 do
8130 {
8131 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8132 fHstRegs &= ~RT_BIT_32(idxHstReg);
8133
8134 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8135 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8136 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8137
8138 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8139 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8140 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8141 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8142 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8143 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8144 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8145 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8146 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8147 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8148 {
8149 case kIemNativeVarKind_Stack:
8150 {
8151 /* Unspill the variable register. */
8152 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8153 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8154 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8155 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8156
8157 if (cbVar == sizeof(RTUINT128U))
8158 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8159 else
8160 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8161 continue;
8162 }
8163
8164 case kIemNativeVarKind_Immediate:
8165 case kIemNativeVarKind_VarRef:
8166 case kIemNativeVarKind_GstRegRef:
8167 /* It is weird to have any of these loaded at this point. */
8168 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8169 continue;
8170
8171 case kIemNativeVarKind_End:
8172 case kIemNativeVarKind_Invalid:
8173 break;
8174 }
8175 AssertFailed();
8176 } while (fHstRegs);
8177 }
8178 return off;
8179}
8180
8181
8182/**
8183 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8184 *
8185 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8186 *
8187 * ASSUMES that @a idxVar is valid and unpacked.
8188 */
8189DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8190{
8191 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8192 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8193 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8194 {
8195 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8196 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8197 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8198 Assert(cSlots > 0);
8199 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8200 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8201 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8202 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8203 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8204 }
8205 else
8206 Assert(idxStackSlot == UINT8_MAX);
8207}
8208
8209
8210/**
8211 * Worker that frees a single variable.
8212 *
8213 * ASSUMES that @a idxVar is valid and unpacked.
8214 */
8215DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8216{
8217 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8218 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8219 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8220
8221 /* Free the host register first if any assigned. */
8222 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8223 if (idxHstReg != UINT8_MAX)
8224 {
8225 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8226 {
8227 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8228 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8229 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8230 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8231 }
8232 else
8233 {
8234 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8235 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8236 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8237 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8238 }
8239 }
8240
8241 /* Free argument mapping. */
8242 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8243 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8244 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8245
8246 /* Free the stack slots. */
8247 iemNativeVarFreeStackSlots(pReNative, idxVar);
8248
8249 /* Free the actual variable. */
8250 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8251 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8252}
8253
8254
8255/**
8256 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8257 */
8258DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8259{
8260 while (bmVars != 0)
8261 {
8262 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8263 bmVars &= ~RT_BIT_32(idxVar);
8264
8265#if 1 /** @todo optimize by simplifying this later... */
8266 iemNativeVarFreeOneWorker(pReNative, idxVar);
8267#else
8268 /* Only need to free the host register, the rest is done as bulk updates below. */
8269 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8270 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8271 {
8272 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8273 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8274 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8275 }
8276#endif
8277 }
8278#if 0 /** @todo optimize by simplifying this later... */
8279 pReNative->Core.bmVars = 0;
8280 pReNative->Core.bmStack = 0;
8281 pReNative->Core.u64ArgVars = UINT64_MAX;
8282#endif
8283}
8284
8285
8286
8287/*********************************************************************************************************************************
8288* Emitters for IEM_MC_CALL_CIMPL_XXX *
8289*********************************************************************************************************************************/
8290
8291/**
8292 * Emits code to load a reference to the given guest register into @a idxGprDst.
8293 */
8294DECL_HIDDEN_THROW(uint32_t)
8295iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8296 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8297{
8298#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8299 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8300#endif
8301
8302 /*
8303 * Get the offset relative to the CPUMCTX structure.
8304 */
8305 uint32_t offCpumCtx;
8306 switch (enmClass)
8307 {
8308 case kIemNativeGstRegRef_Gpr:
8309 Assert(idxRegInClass < 16);
8310 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8311 break;
8312
8313 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8314 Assert(idxRegInClass < 4);
8315 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8316 break;
8317
8318 case kIemNativeGstRegRef_EFlags:
8319 Assert(idxRegInClass == 0);
8320 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8321 break;
8322
8323 case kIemNativeGstRegRef_MxCsr:
8324 Assert(idxRegInClass == 0);
8325 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8326 break;
8327
8328 case kIemNativeGstRegRef_FpuReg:
8329 Assert(idxRegInClass < 8);
8330 AssertFailed(); /** @todo what kind of indexing? */
8331 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8332 break;
8333
8334 case kIemNativeGstRegRef_MReg:
8335 Assert(idxRegInClass < 8);
8336 AssertFailed(); /** @todo what kind of indexing? */
8337 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8338 break;
8339
8340 case kIemNativeGstRegRef_XReg:
8341 Assert(idxRegInClass < 16);
8342 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8343 break;
8344
8345 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8346 Assert(idxRegInClass == 0);
8347 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8348 break;
8349
8350 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8351 Assert(idxRegInClass == 0);
8352 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8353 break;
8354
8355 default:
8356 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8357 }
8358
8359 /*
8360 * Load the value into the destination register.
8361 */
8362#ifdef RT_ARCH_AMD64
8363 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8364
8365#elif defined(RT_ARCH_ARM64)
8366 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8367 Assert(offCpumCtx < 4096);
8368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8369
8370#else
8371# error "Port me!"
8372#endif
8373
8374 return off;
8375}
8376
8377
8378/**
8379 * Common code for CIMPL and AIMPL calls.
8380 *
8381 * These are calls that uses argument variables and such. They should not be
8382 * confused with internal calls required to implement an MC operation,
8383 * like a TLB load and similar.
8384 *
8385 * Upon return all that is left to do is to load any hidden arguments and
8386 * perform the call. All argument variables are freed.
8387 *
8388 * @returns New code buffer offset; throws VBox status code on error.
8389 * @param pReNative The native recompile state.
8390 * @param off The code buffer offset.
8391 * @param cArgs The total nubmer of arguments (includes hidden
8392 * count).
8393 * @param cHiddenArgs The number of hidden arguments. The hidden
8394 * arguments must not have any variable declared for
8395 * them, whereas all the regular arguments must
8396 * (tstIEMCheckMc ensures this).
8397 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8398 * this will still flush pending writes in call volatile registers if false.
8399 */
8400DECL_HIDDEN_THROW(uint32_t)
8401iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8402 bool fFlushPendingWrites /*= true*/)
8403{
8404#ifdef VBOX_STRICT
8405 /*
8406 * Assert sanity.
8407 */
8408 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8409 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8410 for (unsigned i = 0; i < cHiddenArgs; i++)
8411 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8412 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8413 {
8414 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8415 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8416 }
8417 iemNativeRegAssertSanity(pReNative);
8418#endif
8419
8420 /* We don't know what the called function makes use of, so flush any pending register writes. */
8421 RT_NOREF(fFlushPendingWrites);
8422#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8423 if (fFlushPendingWrites)
8424#endif
8425 off = iemNativeRegFlushPendingWrites(pReNative, off);
8426
8427 /*
8428 * Before we do anything else, go over variables that are referenced and
8429 * make sure they are not in a register.
8430 */
8431 uint32_t bmVars = pReNative->Core.bmVars;
8432 if (bmVars)
8433 {
8434 do
8435 {
8436 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8437 bmVars &= ~RT_BIT_32(idxVar);
8438
8439 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8440 {
8441 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8442 if (idxRegOld != UINT8_MAX)
8443 {
8444 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8445 {
8446 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
8447
8448 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8449 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8450 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8451 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8452 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8453
8454 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8455 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8456 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8457 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8458 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8459 }
8460 else
8461 {
8462 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8463 Assert( pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U)
8464 || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8465
8466 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8467 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8468 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8469 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8470 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8471 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off,
8472 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8473 else
8474 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off,
8475 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8476
8477 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8478 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8479
8480 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8481 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8482 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8483 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8484 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8485 }
8486 }
8487 }
8488 } while (bmVars != 0);
8489#if 0 //def VBOX_STRICT
8490 iemNativeRegAssertSanity(pReNative);
8491#endif
8492 }
8493
8494 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8495
8496#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8497 /*
8498 * At the very first step go over the host registers that will be used for arguments
8499 * don't shadow anything which needs writing back first.
8500 */
8501 for (uint32_t i = 0; i < cRegArgs; i++)
8502 {
8503 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8504
8505 /* Writeback any dirty guest shadows before using this register. */
8506 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8507 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8508 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8509 }
8510#endif
8511
8512 /*
8513 * First, go over the host registers that will be used for arguments and make
8514 * sure they either hold the desired argument or are free.
8515 */
8516 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8517 {
8518 for (uint32_t i = 0; i < cRegArgs; i++)
8519 {
8520 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8521 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8522 {
8523 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8524 {
8525 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8526 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8527 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8528 Assert(pVar->idxReg == idxArgReg);
8529 uint8_t const uArgNo = pVar->uArgNo;
8530 if (uArgNo == i)
8531 { /* prefect */ }
8532 /* The variable allocator logic should make sure this is impossible,
8533 except for when the return register is used as a parameter (ARM,
8534 but not x86). */
8535#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8536 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8537 {
8538# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8539# error "Implement this"
8540# endif
8541 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8542 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8543 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8544 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8545 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8546 }
8547#endif
8548 else
8549 {
8550 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8551
8552 if (pVar->enmKind == kIemNativeVarKind_Stack)
8553 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8554 else
8555 {
8556 /* just free it, can be reloaded if used again */
8557 pVar->idxReg = UINT8_MAX;
8558 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8559 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8560 }
8561 }
8562 }
8563 else
8564 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8565 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8566 }
8567 }
8568#if 0 //def VBOX_STRICT
8569 iemNativeRegAssertSanity(pReNative);
8570#endif
8571 }
8572
8573 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8574
8575#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8576 /*
8577 * If there are any stack arguments, make sure they are in their place as well.
8578 *
8579 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8580 * the caller) be loading it later and it must be free (see first loop).
8581 */
8582 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8583 {
8584 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8585 {
8586 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8587 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8588 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8589 {
8590 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8591 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8592 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8593 pVar->idxReg = UINT8_MAX;
8594 }
8595 else
8596 {
8597 /* Use ARG0 as temp for stuff we need registers for. */
8598 switch (pVar->enmKind)
8599 {
8600 case kIemNativeVarKind_Stack:
8601 {
8602 uint8_t const idxStackSlot = pVar->idxStackSlot;
8603 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8604 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8605 iemNativeStackCalcBpDisp(idxStackSlot));
8606 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8607 continue;
8608 }
8609
8610 case kIemNativeVarKind_Immediate:
8611 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8612 continue;
8613
8614 case kIemNativeVarKind_VarRef:
8615 {
8616 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8617 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8618 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8619 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8620 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8621 if (idxRegOther != UINT8_MAX)
8622 {
8623 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8624 {
8625 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8626 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8627 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8628 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8629 }
8630 else
8631 {
8632 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8633 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8634 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8635 else
8636 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8637 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8638 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8639 }
8640 }
8641 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8642 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8643 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8644 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8645 continue;
8646 }
8647
8648 case kIemNativeVarKind_GstRegRef:
8649 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8650 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8651 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8652 continue;
8653
8654 case kIemNativeVarKind_Invalid:
8655 case kIemNativeVarKind_End:
8656 break;
8657 }
8658 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8659 }
8660 }
8661# if 0 //def VBOX_STRICT
8662 iemNativeRegAssertSanity(pReNative);
8663# endif
8664 }
8665#else
8666 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8667#endif
8668
8669 /*
8670 * Make sure the argument variables are loaded into their respective registers.
8671 *
8672 * We can optimize this by ASSUMING that any register allocations are for
8673 * registeres that have already been loaded and are ready. The previous step
8674 * saw to that.
8675 */
8676 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8677 {
8678 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8679 {
8680 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8681 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8682 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8683 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8684 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8685 else
8686 {
8687 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8688 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8689 {
8690 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8691 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8692 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8693 | RT_BIT_32(idxArgReg);
8694 pVar->idxReg = idxArgReg;
8695 }
8696 else
8697 {
8698 /* Use ARG0 as temp for stuff we need registers for. */
8699 switch (pVar->enmKind)
8700 {
8701 case kIemNativeVarKind_Stack:
8702 {
8703 uint8_t const idxStackSlot = pVar->idxStackSlot;
8704 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8705 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8706 continue;
8707 }
8708
8709 case kIemNativeVarKind_Immediate:
8710 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8711 continue;
8712
8713 case kIemNativeVarKind_VarRef:
8714 {
8715 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8716 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8717 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8718 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8719 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8720 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8721 if (idxRegOther != UINT8_MAX)
8722 {
8723 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8724 {
8725 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8726 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8727 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8728 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8729 }
8730 else
8731 {
8732 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8733 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8734 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8735 else
8736 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8737 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8738 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8739 }
8740 }
8741 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8742 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8743 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8744 continue;
8745 }
8746
8747 case kIemNativeVarKind_GstRegRef:
8748 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8749 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8750 continue;
8751
8752 case kIemNativeVarKind_Invalid:
8753 case kIemNativeVarKind_End:
8754 break;
8755 }
8756 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8757 }
8758 }
8759 }
8760#if 0 //def VBOX_STRICT
8761 iemNativeRegAssertSanity(pReNative);
8762#endif
8763 }
8764#ifdef VBOX_STRICT
8765 else
8766 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8767 {
8768 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8769 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8770 }
8771#endif
8772
8773 /*
8774 * Free all argument variables (simplified).
8775 * Their lifetime always expires with the call they are for.
8776 */
8777 /** @todo Make the python script check that arguments aren't used after
8778 * IEM_MC_CALL_XXXX. */
8779 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8780 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8781 * an argument value. There is also some FPU stuff. */
8782 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8783 {
8784 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8785 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8786
8787 /* no need to free registers: */
8788 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8789 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8790 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8791 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8792 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8793 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8794
8795 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8796 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8797 iemNativeVarFreeStackSlots(pReNative, idxVar);
8798 }
8799 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8800
8801 /*
8802 * Flush volatile registers as we make the call.
8803 */
8804 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8805
8806 return off;
8807}
8808
8809
8810
8811/*********************************************************************************************************************************
8812* TLB Lookup. *
8813*********************************************************************************************************************************/
8814
8815/**
8816 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8817 */
8818DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8819{
8820 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8821 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8822 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8823 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8824 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8825
8826 /* Do the lookup manually. */
8827 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8828 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8829 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8830 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8831 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8832 {
8833 /*
8834 * Check TLB page table level access flags.
8835 */
8836 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8837 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8838 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8839 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8840 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8841 | IEMTLBE_F_PG_UNASSIGNED
8842 | IEMTLBE_F_PT_NO_ACCESSED
8843 | fNoWriteNoDirty | fNoUser);
8844 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8845 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8846 {
8847 /*
8848 * Return the address.
8849 */
8850 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8851 if ((uintptr_t)pbAddr == uResult)
8852 return;
8853 RT_NOREF(cbMem);
8854 AssertFailed();
8855 }
8856 else
8857 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8858 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8859 }
8860 else
8861 AssertFailed();
8862 RT_BREAKPOINT();
8863}
8864
8865/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8866
8867
8868
8869/*********************************************************************************************************************************
8870* Recompiler Core. *
8871*********************************************************************************************************************************/
8872
8873/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8874static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8875{
8876 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8877 pDis->cbCachedInstr += cbMaxRead;
8878 RT_NOREF(cbMinRead);
8879 return VERR_NO_DATA;
8880}
8881
8882
8883DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8884{
8885 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8886 {
8887#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8888 ENTRY(fLocalForcedActions),
8889 ENTRY(iem.s.rcPassUp),
8890 ENTRY(iem.s.fExec),
8891 ENTRY(iem.s.pbInstrBuf),
8892 ENTRY(iem.s.uInstrBufPc),
8893 ENTRY(iem.s.GCPhysInstrBuf),
8894 ENTRY(iem.s.cbInstrBufTotal),
8895 ENTRY(iem.s.idxTbCurInstr),
8896 ENTRY(iem.s.fSkippingEFlags),
8897#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8898 ENTRY(iem.s.uPcUpdatingDebug),
8899#endif
8900#ifdef VBOX_WITH_STATISTICS
8901 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8902 ENTRY(iem.s.StatNativeTlbHitsForStore),
8903 ENTRY(iem.s.StatNativeTlbHitsForStack),
8904 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8905 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8906 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8907 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8908 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8909#endif
8910 ENTRY(iem.s.DataTlb.uTlbRevision),
8911 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8912 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8913 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8914 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8915 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8916 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8917 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8918 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8919 ENTRY(iem.s.DataTlb.aEntries),
8920 ENTRY(iem.s.CodeTlb.uTlbRevision),
8921 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8922 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8923 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8924 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8925 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8926 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8927 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8928 ENTRY(iem.s.CodeTlb.aEntries),
8929 ENTRY(pVMR3),
8930 ENTRY(cpum.GstCtx.rax),
8931 ENTRY(cpum.GstCtx.ah),
8932 ENTRY(cpum.GstCtx.rcx),
8933 ENTRY(cpum.GstCtx.ch),
8934 ENTRY(cpum.GstCtx.rdx),
8935 ENTRY(cpum.GstCtx.dh),
8936 ENTRY(cpum.GstCtx.rbx),
8937 ENTRY(cpum.GstCtx.bh),
8938 ENTRY(cpum.GstCtx.rsp),
8939 ENTRY(cpum.GstCtx.rbp),
8940 ENTRY(cpum.GstCtx.rsi),
8941 ENTRY(cpum.GstCtx.rdi),
8942 ENTRY(cpum.GstCtx.r8),
8943 ENTRY(cpum.GstCtx.r9),
8944 ENTRY(cpum.GstCtx.r10),
8945 ENTRY(cpum.GstCtx.r11),
8946 ENTRY(cpum.GstCtx.r12),
8947 ENTRY(cpum.GstCtx.r13),
8948 ENTRY(cpum.GstCtx.r14),
8949 ENTRY(cpum.GstCtx.r15),
8950 ENTRY(cpum.GstCtx.es.Sel),
8951 ENTRY(cpum.GstCtx.es.u64Base),
8952 ENTRY(cpum.GstCtx.es.u32Limit),
8953 ENTRY(cpum.GstCtx.es.Attr),
8954 ENTRY(cpum.GstCtx.cs.Sel),
8955 ENTRY(cpum.GstCtx.cs.u64Base),
8956 ENTRY(cpum.GstCtx.cs.u32Limit),
8957 ENTRY(cpum.GstCtx.cs.Attr),
8958 ENTRY(cpum.GstCtx.ss.Sel),
8959 ENTRY(cpum.GstCtx.ss.u64Base),
8960 ENTRY(cpum.GstCtx.ss.u32Limit),
8961 ENTRY(cpum.GstCtx.ss.Attr),
8962 ENTRY(cpum.GstCtx.ds.Sel),
8963 ENTRY(cpum.GstCtx.ds.u64Base),
8964 ENTRY(cpum.GstCtx.ds.u32Limit),
8965 ENTRY(cpum.GstCtx.ds.Attr),
8966 ENTRY(cpum.GstCtx.fs.Sel),
8967 ENTRY(cpum.GstCtx.fs.u64Base),
8968 ENTRY(cpum.GstCtx.fs.u32Limit),
8969 ENTRY(cpum.GstCtx.fs.Attr),
8970 ENTRY(cpum.GstCtx.gs.Sel),
8971 ENTRY(cpum.GstCtx.gs.u64Base),
8972 ENTRY(cpum.GstCtx.gs.u32Limit),
8973 ENTRY(cpum.GstCtx.gs.Attr),
8974 ENTRY(cpum.GstCtx.rip),
8975 ENTRY(cpum.GstCtx.eflags),
8976 ENTRY(cpum.GstCtx.uRipInhibitInt),
8977 ENTRY(cpum.GstCtx.cr0),
8978 ENTRY(cpum.GstCtx.cr4),
8979 ENTRY(cpum.GstCtx.aXcr[0]),
8980 ENTRY(cpum.GstCtx.aXcr[1]),
8981 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8982 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8983 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8984 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8985 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8986 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8987 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8988 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8989 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8990 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8991 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8992 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8993 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8994 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8995 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8996 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8997 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8998 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8999 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9000 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9001 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9002 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9003 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9004 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9005 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9006 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9007 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9008 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9009 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9010 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9011 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9012 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9013 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9014#undef ENTRY
9015 };
9016#ifdef VBOX_STRICT
9017 static bool s_fOrderChecked = false;
9018 if (!s_fOrderChecked)
9019 {
9020 s_fOrderChecked = true;
9021 uint32_t offPrev = s_aMembers[0].off;
9022 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9023 {
9024 Assert(s_aMembers[i].off > offPrev);
9025 offPrev = s_aMembers[i].off;
9026 }
9027 }
9028#endif
9029
9030 /*
9031 * Binary lookup.
9032 */
9033 unsigned iStart = 0;
9034 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9035 for (;;)
9036 {
9037 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9038 uint32_t const offCur = s_aMembers[iCur].off;
9039 if (off < offCur)
9040 {
9041 if (iCur != iStart)
9042 iEnd = iCur;
9043 else
9044 break;
9045 }
9046 else if (off > offCur)
9047 {
9048 if (iCur + 1 < iEnd)
9049 iStart = iCur + 1;
9050 else
9051 break;
9052 }
9053 else
9054 return s_aMembers[iCur].pszName;
9055 }
9056#ifdef VBOX_WITH_STATISTICS
9057 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9058 return "iem.s.acThreadedFuncStats[iFn]";
9059#endif
9060 return NULL;
9061}
9062
9063
9064/**
9065 * Translates a label to a name.
9066 */
9067static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9068{
9069 switch (enmLabel)
9070 {
9071#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9072 STR_CASE_CMN(Invalid);
9073 STR_CASE_CMN(RaiseDe);
9074 STR_CASE_CMN(RaiseUd);
9075 STR_CASE_CMN(RaiseSseRelated);
9076 STR_CASE_CMN(RaiseAvxRelated);
9077 STR_CASE_CMN(RaiseSseAvxFpRelated);
9078 STR_CASE_CMN(RaiseNm);
9079 STR_CASE_CMN(RaiseGp0);
9080 STR_CASE_CMN(RaiseMf);
9081 STR_CASE_CMN(RaiseXf);
9082 STR_CASE_CMN(ObsoleteTb);
9083 STR_CASE_CMN(NeedCsLimChecking);
9084 STR_CASE_CMN(CheckBranchMiss);
9085 STR_CASE_CMN(ReturnSuccess);
9086 STR_CASE_CMN(ReturnBreak);
9087 STR_CASE_CMN(ReturnBreakFF);
9088 STR_CASE_CMN(ReturnWithFlags);
9089 STR_CASE_CMN(ReturnBreakViaLookup);
9090 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9091 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9092 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9093 STR_CASE_CMN(NonZeroRetOrPassUp);
9094#undef STR_CASE_CMN
9095#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9096 STR_CASE_LBL(LoopJumpTarget);
9097 STR_CASE_LBL(If);
9098 STR_CASE_LBL(Else);
9099 STR_CASE_LBL(Endif);
9100 STR_CASE_LBL(CheckIrq);
9101 STR_CASE_LBL(TlbLookup);
9102 STR_CASE_LBL(TlbMiss);
9103 STR_CASE_LBL(TlbDone);
9104 case kIemNativeLabelType_End: break;
9105 }
9106 return NULL;
9107}
9108
9109
9110/** Info for the symbols resolver used when disassembling. */
9111typedef struct IEMNATIVDISASMSYMCTX
9112{
9113 PVMCPU pVCpu;
9114 PCIEMTB pTb;
9115 PCIEMNATIVEPERCHUNKCTX pCtx;
9116#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9117 PCIEMTBDBG pDbgInfo;
9118#endif
9119} IEMNATIVDISASMSYMCTX;
9120typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9121
9122
9123/**
9124 * Resolve address to symbol, if we can.
9125 */
9126static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9127{
9128 PCIEMTB const pTb = pSymCtx->pTb;
9129 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9130 if (offNative <= pTb->Native.cInstructions)
9131 {
9132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9133 /*
9134 * Scan debug info for a matching label.
9135 * Since the debug info should be 100% linear, we can do a binary search here.
9136 */
9137 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9138 if (pDbgInfo)
9139 {
9140 uint32_t const cEntries = pDbgInfo->cEntries;
9141 uint32_t idxEnd = cEntries;
9142 uint32_t idxStart = 0;
9143 for (;;)
9144 {
9145 /* Find a NativeOffset record close to the midpoint. */
9146 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9147 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9148 idx--;
9149 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9150 {
9151 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9152 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9153 idx++;
9154 if (idx >= idxEnd)
9155 break;
9156 }
9157
9158 /* Do the binary searching thing. */
9159 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9160 {
9161 if (idx > idxStart)
9162 idxEnd = idx;
9163 else
9164 break;
9165 }
9166 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9167 {
9168 idx += 1;
9169 if (idx < idxEnd)
9170 idxStart = idx;
9171 else
9172 break;
9173 }
9174 else
9175 {
9176 /* Got a matching offset, scan forward till we hit a label, but
9177 stop when the native offset changes. */
9178 while (++idx < cEntries)
9179 switch (pDbgInfo->aEntries[idx].Gen.uType)
9180 {
9181 case kIemTbDbgEntryType_Label:
9182 {
9183 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9184 const char * const pszName = iemNativeGetLabelName(enmLabel);
9185 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9186 return pszName;
9187 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9188 return pszBuf;
9189 }
9190
9191 case kIemTbDbgEntryType_NativeOffset:
9192 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9193 return NULL;
9194 break;
9195 }
9196 break;
9197 }
9198 }
9199 }
9200#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9201 }
9202 else
9203 {
9204 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9205 if (pChunkCtx)
9206 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9207 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9208 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9209 }
9210 RT_NOREF(pszBuf, cbBuf);
9211 return NULL;
9212}
9213
9214#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9215
9216/**
9217 * @callback_method_impl{FNDISGETSYMBOL}
9218 */
9219static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9220 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9221{
9222 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9223 if (pszSym)
9224 {
9225 *poff = 0;
9226 if (pszSym != pszBuf)
9227 return RTStrCopy(pszBuf, cchBuf, pszSym);
9228 return VINF_SUCCESS;
9229 }
9230 RT_NOREF(pDis, u32Sel);
9231 return VERR_SYMBOL_NOT_FOUND;
9232}
9233
9234
9235/**
9236 * Appends annotations to the disassembled instructions.
9237 */
9238static void
9239iemNativeDisasmAppendAnnotation(char *pszDisBuf, size_t cbDisBuf, PCDISSTATE pDis)
9240{
9241 const char *pszAnnotation = NULL;
9242# if defined(RT_ARCH_AMD64)
9243 if (pDis->pCurInstr->uOpcode == OP_NOP && pDis->cbInstr == 7) /* iemNativeEmitMarker */
9244 {
9245 static const char * const s_apszMarkers[] =
9246 {
9247 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9248 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9249 };
9250
9251 uint32_t const uInfo = *(uint32_t const *)&pDis->Instr.ab[3];
9252 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9253 RTStrPrintf(pszDisBuf, cbDisBuf, "nop ; marker: call #%u to %s (%u args) - %s\n",
9254 uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9255 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9256 uInfo & 0x8000 ? "recompiled" : "todo");
9257 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(s_apszMarkers))
9258 RTStrPrintf(pszDisBuf, cbDisBuf, "nop ; marker: %s\n", s_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9259 else
9260 RTStrPrintf(pszDisBuf, cbDisBuf, "nop ; unknown marker: %#x (%d)\n", uInfo, uInfo);
9261 return;
9262 }
9263
9264 PCDISOPPARAM pMemOp;
9265 if (DISUSE_IS_EFFECTIVE_ADDR(pDis->aParams[0].fUse))
9266 pMemOp = &pDis->aParams[0];
9267 else if (DISUSE_IS_EFFECTIVE_ADDR(pDis->aParams[1].fUse))
9268 pMemOp = &pDis->aParams[1];
9269 else if (DISUSE_IS_EFFECTIVE_ADDR(pDis->aParams[2].fUse))
9270 pMemOp = &pDis->aParams[2];
9271 else
9272 return;
9273 if ( pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9274 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9275 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9276 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9277 else
9278 return;
9279
9280# elif defined(RT_ARCH_ARM64)
9281 /* The memory operand is always number two on arm. */
9282 if ( pDis->aParams[1].armv8.enmType == kDisArmv8OpParmAddrInGpr
9283 && !(pDis->aParams[1].fUse & (DISUSE_INDEX | DISUSE_PRE_INDEXED | DISUSE_POST_INDEXED))
9284 /** @todo DISUSE_REG_GEN64 is not set: && (pDis->aParams[1].fUse & DISUSE_REG_GEN64) */
9285 && pDis->aParams[1].armv8.Op.Reg.enmRegType == kDisOpParamArmV8RegType_Gpr_64Bit)
9286 {
9287 if (pDis->aParams[1].armv8.Op.Reg.idReg == IEMNATIVE_REG_FIXED_PVMCPU)
9288 pszAnnotation = iemNativeDbgVCpuOffsetToName(pDis->aParams[1].armv8.u.offBase);
9289 else if (pDis->aParams[1].armv8.Op.Reg.idReg == IEMNATIVE_REG_FIXED_PCPUMCTX)
9290 pszAnnotation = iemNativeDbgVCpuOffsetToName(pDis->aParams[1].armv8.u.offBase + RT_UOFFSETOF(VMCPU, cpum.GstCtx));
9291 }
9292 else
9293 return;
9294
9295# else
9296# error "Port me"
9297# endif
9298 if (pszAnnotation)
9299 {
9300 static unsigned const s_offAnnotation = 55;
9301 size_t const cchAnnotation = strlen(pszAnnotation);
9302 size_t cchDis = strlen(pszDisBuf);
9303 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= cbDisBuf)
9304 {
9305 if (cchDis < s_offAnnotation)
9306 {
9307 memset(&pszDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9308 cchDis = s_offAnnotation;
9309 }
9310 pszDisBuf[cchDis++] = ' ';
9311 pszDisBuf[cchDis++] = ';';
9312 pszDisBuf[cchDis++] = ' ';
9313 memcpy(&pszDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9314 }
9315 }
9316}
9317
9318#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9319
9320/**
9321 * Annotates an instruction decoded by the capstone disassembler.
9322 */
9323static const char *
9324iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9325{
9326# if defined(RT_ARCH_ARM64)
9327 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9328 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9329 {
9330 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9331 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9332 char const *psz = strchr(pInstr->op_str, '[');
9333 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9334 {
9335 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9336 int32_t off = -1;
9337 psz += 4;
9338 if (*psz == ']')
9339 off = 0;
9340 else if (*psz == ',')
9341 {
9342 psz = RTStrStripL(psz + 1);
9343 if (*psz == '#')
9344 off = RTStrToInt32(&psz[1]);
9345 /** @todo deal with index registers and LSL as well... */
9346 }
9347 if (off >= 0)
9348 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9349 }
9350 }
9351 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9352 {
9353 const char *pszAddr = strchr(pInstr->op_str, '#');
9354 if (pszAddr)
9355 {
9356 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9357 if (uAddr != 0)
9358 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9359 }
9360 }
9361# endif
9362 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9363 return NULL;
9364}
9365#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9366
9367
9368DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9369{
9370 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9371 char szDisBuf[512];
9372 DISSTATE Dis;
9373 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9374 uint32_t const cNative = pTb->Native.cInstructions;
9375 uint32_t offNative = 0;
9376#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9377 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9378#endif
9379 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9380 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9381 : DISCPUMODE_64BIT;
9382#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9383 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9384#else
9385 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9386#endif
9387#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9388 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9389#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9390 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9391#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9392# error "Port me"
9393#else
9394 csh hDisasm = ~(size_t)0;
9395# if defined(RT_ARCH_AMD64)
9396 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9397# elif defined(RT_ARCH_ARM64)
9398 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9399# else
9400# error "Port me"
9401# endif
9402 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9403
9404 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9405 //Assert(rcCs == CS_ERR_OK);
9406#endif
9407
9408 /*
9409 * Print TB info.
9410 */
9411 pHlp->pfnPrintf(pHlp,
9412 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9413 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9414 pTb, pTb->GCPhysPc,
9415#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9416 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9417#else
9418 pTb->FlatPc,
9419#endif
9420 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9421 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9422#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9423 if (pDbgInfo && pDbgInfo->cEntries > 1)
9424 {
9425 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9426
9427 /*
9428 * This disassembly is driven by the debug info which follows the native
9429 * code and indicates when it starts with the next guest instructions,
9430 * where labels are and such things.
9431 */
9432 uint32_t idxThreadedCall = 0;
9433 uint32_t idxGuestInstr = 0;
9434 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9435 uint8_t idxRange = UINT8_MAX;
9436 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9437 uint32_t offRange = 0;
9438 uint32_t offOpcodes = 0;
9439 uint32_t const cbOpcodes = pTb->cbOpcodes;
9440 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9441 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9442 uint32_t iDbgEntry = 1;
9443 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9444
9445 while (offNative < cNative)
9446 {
9447 /* If we're at or have passed the point where the next chunk of debug
9448 info starts, process it. */
9449 if (offDbgNativeNext <= offNative)
9450 {
9451 offDbgNativeNext = UINT32_MAX;
9452 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9453 {
9454 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9455 {
9456 case kIemTbDbgEntryType_GuestInstruction:
9457 {
9458 /* Did the exec flag change? */
9459 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9460 {
9461 pHlp->pfnPrintf(pHlp,
9462 " fExec change %#08x -> %#08x %s\n",
9463 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9464 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9465 szDisBuf, sizeof(szDisBuf)));
9466 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9467 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9468 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9469 : DISCPUMODE_64BIT;
9470 }
9471
9472 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9473 where the compilation was aborted before the opcode was recorded and the actual
9474 instruction was translated to a threaded call. This may happen when we run out
9475 of ranges, or when some complicated interrupts/FFs are found to be pending or
9476 similar. So, we just deal with it here rather than in the compiler code as it
9477 is a lot simpler to do here. */
9478 if ( idxRange == UINT8_MAX
9479 || idxRange >= cRanges
9480 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9481 {
9482 idxRange += 1;
9483 if (idxRange < cRanges)
9484 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9485 else
9486 continue;
9487 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9488 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9489 + (pTb->aRanges[idxRange].idxPhysPage == 0
9490 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9491 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9492 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9493 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9494 pTb->aRanges[idxRange].idxPhysPage);
9495 GCPhysPc += offRange;
9496 }
9497
9498 /* Disassemble the instruction. */
9499 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9500 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9501 uint32_t cbInstr = 1;
9502 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9503 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9504 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9505 if (RT_SUCCESS(rc))
9506 {
9507 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9508 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9509 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9510 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9511
9512 static unsigned const s_offMarker = 55;
9513 static char const s_szMarker[] = " ; <--- guest";
9514 if (cch < s_offMarker)
9515 {
9516 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9517 cch = s_offMarker;
9518 }
9519 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9520 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9521
9522 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9523 }
9524 else
9525 {
9526 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9527 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9528 cbInstr = 1;
9529 }
9530 idxGuestInstr++;
9531 GCPhysPc += cbInstr;
9532 offOpcodes += cbInstr;
9533 offRange += cbInstr;
9534 continue;
9535 }
9536
9537 case kIemTbDbgEntryType_ThreadedCall:
9538 pHlp->pfnPrintf(pHlp,
9539 " Call #%u to %s (%u args) - %s\n",
9540 idxThreadedCall,
9541 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9542 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9543 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9544 idxThreadedCall++;
9545 continue;
9546
9547 case kIemTbDbgEntryType_GuestRegShadowing:
9548 {
9549 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9550 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9551 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9552 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9553 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9554 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9555 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9556 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9557 else
9558 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9559 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9560 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9561 continue;
9562 }
9563
9564 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9565 {
9566 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9567 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9568 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9569 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9570 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9571 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9572 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9573 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9574 else
9575 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9576 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9577 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9578 continue;
9579 }
9580
9581 case kIemTbDbgEntryType_Label:
9582 {
9583 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9584 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9585 {
9586 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9587 ? " ; regs state restored pre-if-block" : "";
9588 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9589 }
9590 else
9591 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9592 continue;
9593 }
9594
9595 case kIemTbDbgEntryType_NativeOffset:
9596 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9597 Assert(offDbgNativeNext >= offNative);
9598 break;
9599
9600# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9601 case kIemTbDbgEntryType_DelayedPcUpdate:
9602 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9603 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9604 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9605 continue;
9606# endif
9607
9608# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9609 case kIemTbDbgEntryType_GuestRegDirty:
9610 {
9611 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9612 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9613 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9614 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9615 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9616 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9617 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9618 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9619 pszGstReg, pszHstReg);
9620 continue;
9621 }
9622
9623 case kIemTbDbgEntryType_GuestRegWriteback:
9624 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9625 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9626 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9627 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9628 continue;
9629# endif
9630
9631# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9632 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9633 {
9634 const char *pszOp = "!unknown!";
9635 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9636 {
9637 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9638 case kIemNativePostponedEflOp_Invalid: break;
9639 case kIemNativePostponedEflOp_End: break;
9640 }
9641 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9642 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9643 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9644 continue;
9645 }
9646# endif
9647 default:
9648 AssertFailed();
9649 continue;
9650 }
9651 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9652 iDbgEntry++;
9653 break;
9654 }
9655 }
9656
9657 /*
9658 * Disassemble the next native instruction.
9659 */
9660 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9661# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9662 uint32_t cbInstr = sizeof(paNative[0]);
9663 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9664 if (RT_SUCCESS(rc))
9665 {
9666# ifdef RT_ARCH_AMD64
9667 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9668 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9669 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9670 iemNativeDisasmGetSymbolCb, &SymCtx);
9671# elif defined(RT_ARCH_ARM64)
9672 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9673 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9674 iemNativeDisasmGetSymbolCb, &SymCtx);
9675# else
9676# error "Port me"
9677# endif
9678 iemNativeDisasmAppendAnnotation(szDisBuf, sizeof(szDisBuf), &Dis);
9679 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9680 }
9681 else
9682 {
9683# if defined(RT_ARCH_AMD64)
9684 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9685 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9686# elif defined(RT_ARCH_ARM64)
9687 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9688# else
9689# error "Port me"
9690# endif
9691 cbInstr = sizeof(paNative[0]);
9692 }
9693 offNative += cbInstr / sizeof(paNative[0]);
9694
9695# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9696 cs_insn *pInstr;
9697 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9698 (uintptr_t)pNativeCur, 1, &pInstr);
9699 if (cInstrs > 0)
9700 {
9701 Assert(cInstrs == 1);
9702 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9703 size_t const cchOp = strlen(pInstr->op_str);
9704# if defined(RT_ARCH_AMD64)
9705 if (pszAnnotation)
9706 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9707 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9708 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9709 else
9710 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9711 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9712
9713# else
9714 if (pszAnnotation)
9715 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9716 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9717 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9718 else
9719 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9720 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9721# endif
9722 offNative += pInstr->size / sizeof(*pNativeCur);
9723 cs_free(pInstr, cInstrs);
9724 }
9725 else
9726 {
9727# if defined(RT_ARCH_AMD64)
9728 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9729 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9730# else
9731 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9732# endif
9733 offNative++;
9734 }
9735# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9736 }
9737 }
9738 else
9739#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9740 {
9741 /*
9742 * No debug info, just disassemble the x86 code and then the native code.
9743 *
9744 * First the guest code:
9745 */
9746 for (unsigned i = 0; i < pTb->cRanges; i++)
9747 {
9748 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9749 + (pTb->aRanges[i].idxPhysPage == 0
9750 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9751 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9752 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9753 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9754 unsigned off = pTb->aRanges[i].offOpcodes;
9755 /** @todo this ain't working when crossing pages! */
9756 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9757 while (off < cbOpcodes)
9758 {
9759 uint32_t cbInstr = 1;
9760 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9761 &pTb->pabOpcodes[off], cbOpcodes - off,
9762 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9763 if (RT_SUCCESS(rc))
9764 {
9765 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9766 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9767 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9768 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9769 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9770 GCPhysPc += cbInstr;
9771 off += cbInstr;
9772 }
9773 else
9774 {
9775 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9776 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9777 break;
9778 }
9779 }
9780 }
9781
9782 /*
9783 * Then the native code:
9784 */
9785 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9786 while (offNative < cNative)
9787 {
9788 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9789#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9790 uint32_t cbInstr = sizeof(paNative[0]);
9791 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9792 if (RT_SUCCESS(rc))
9793 {
9794# ifdef RT_ARCH_AMD64
9795 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9796 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9797 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9798 iemNativeDisasmGetSymbolCb, &SymCtx);
9799# elif defined(RT_ARCH_ARM64)
9800 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9801 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9802 iemNativeDisasmGetSymbolCb, &SymCtx);
9803# else
9804# error "Port me"
9805# endif
9806 iemNativeDisasmAppendAnnotation(szDisBuf, sizeof(szDisBuf), &Dis);
9807 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9808 }
9809 else
9810 {
9811# if defined(RT_ARCH_AMD64)
9812 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9813 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9814# else
9815 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9816# endif
9817 cbInstr = sizeof(paNative[0]);
9818 }
9819 offNative += cbInstr / sizeof(paNative[0]);
9820
9821#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9822 cs_insn *pInstr;
9823 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9824 (uintptr_t)pNativeCur, 1, &pInstr);
9825 if (cInstrs > 0)
9826 {
9827 Assert(cInstrs == 1);
9828 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9829 size_t const cchOp = strlen(pInstr->op_str);
9830# if defined(RT_ARCH_AMD64)
9831 if (pszAnnotation)
9832 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9833 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9834 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9835 else
9836 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9837 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9838
9839# else
9840 if (pszAnnotation)
9841 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9842 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9843 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9844 else
9845 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9846 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9847# endif
9848 offNative += pInstr->size / sizeof(*pNativeCur);
9849 cs_free(pInstr, cInstrs);
9850 }
9851 else
9852 {
9853# if defined(RT_ARCH_AMD64)
9854 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9855 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9856# else
9857 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9858# endif
9859 offNative++;
9860 }
9861#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9862 }
9863 }
9864
9865#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9866 /* Cleanup. */
9867 cs_close(&hDisasm);
9868#endif
9869}
9870
9871
9872/** Emit alignment padding between labels / functions. */
9873DECL_INLINE_THROW(uint32_t)
9874iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9875{
9876 if (off & fAlignMask)
9877 {
9878 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9879 while (off & fAlignMask)
9880#if defined(RT_ARCH_AMD64)
9881 pCodeBuf[off++] = 0xcc;
9882#elif defined(RT_ARCH_ARM64)
9883 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9884#else
9885# error "port me"
9886#endif
9887 }
9888 return off;
9889}
9890
9891
9892/**
9893 * Called when a new chunk is allocate to emit common per-chunk code.
9894 *
9895 * Allocates a per-chunk context directly from the chunk itself and place the
9896 * common code there.
9897 *
9898 * @returns VBox status code.
9899 * @param pVCpu The cross context virtual CPU structure of the calling
9900 * thread.
9901 * @param idxChunk The index of the chunk being added and requiring a
9902 * common code context.
9903 * @param ppCtx Where to return the pointer to the chunk context start.
9904 */
9905DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9906{
9907 *ppCtx = NULL;
9908
9909 /*
9910 * Allocate a new recompiler state (since we're likely to be called while
9911 * the default one is fully loaded already with a recompiled TB).
9912 *
9913 * This is a bit of overkill, but this isn't a frequently used code path.
9914 */
9915 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9916 AssertReturn(pReNative, VERR_NO_MEMORY);
9917
9918#if defined(RT_ARCH_AMD64)
9919 uint32_t const fAlignMask = 15;
9920#elif defined(RT_ARCH_ARM64)
9921 uint32_t const fAlignMask = 31 / 4;
9922#else
9923# error "port me"
9924#endif
9925 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9926 int rc = VINF_SUCCESS;
9927 uint32_t off = 0;
9928
9929 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9930 {
9931 /*
9932 * Emit the epilog code.
9933 */
9934 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9935 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9936 uint32_t const offReturnWithStatus = off;
9937 off = iemNativeEmitCoreEpilog(pReNative, off);
9938
9939 /*
9940 * Generate special jump labels. All of these gets a copy of the epilog code.
9941 */
9942 static struct
9943 {
9944 IEMNATIVELABELTYPE enmExitReason;
9945 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9946 } const s_aSpecialWithEpilogs[] =
9947 {
9948 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9949 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9950 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9951 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9952 };
9953 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9954 {
9955 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9956 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9957 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9958 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9959 off = iemNativeEmitCoreEpilog(pReNative, off);
9960 }
9961
9962 /*
9963 * Do what iemNativeEmitReturnBreakViaLookup does.
9964 */
9965 static struct
9966 {
9967 IEMNATIVELABELTYPE enmExitReason;
9968 uintptr_t pfnHelper;
9969 } const s_aViaLookup[] =
9970 {
9971 { kIemNativeLabelType_ReturnBreakViaLookup,
9972 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9973 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9974 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9975 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9976 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9977 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9978 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9979 };
9980 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9981 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9982 {
9983 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9984 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9985 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9986 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9987 }
9988
9989 /*
9990 * Generate simple TB tail labels that just calls a help with a pVCpu
9991 * arg and either return or longjmps/throws a non-zero status.
9992 */
9993 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9994 static struct
9995 {
9996 IEMNATIVELABELTYPE enmExitReason;
9997 bool fWithEpilog;
9998 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9999 } const s_aSimpleTailLabels[] =
10000 {
10001 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
10002 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
10003 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
10004 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
10005 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
10006 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
10007 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
10008 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
10009 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
10010 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
10011 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
10012 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
10013 };
10014 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
10015 {
10016 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
10017 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
10018 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
10019
10020 /* int pfnCallback(PVMCPUCC pVCpu) */
10021 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10022 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
10023
10024 /* If the callback is supposed to return with a status code we inline the epilog
10025 sequence for better speed. Otherwise, if the callback shouldn't return because
10026 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
10027 if (s_aSimpleTailLabels[i].fWithEpilog)
10028 off = iemNativeEmitCoreEpilog(pReNative, off);
10029 else
10030 {
10031#ifdef VBOX_STRICT
10032 off = iemNativeEmitBrk(pReNative, off, 0x2201);
10033#endif
10034 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
10035 }
10036 }
10037
10038
10039#ifdef VBOX_STRICT
10040 /* Make sure we've generate code for all labels. */
10041 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10042 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10043#endif
10044 }
10045 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10046 {
10047 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10048 iemNativeTerm(pReNative);
10049 return rc;
10050 }
10051 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10052
10053 /*
10054 * Allocate memory for the context (first) and the common code (last).
10055 */
10056 PIEMNATIVEPERCHUNKCTX pCtx;
10057 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10058 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10059 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10060 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10061 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
10062 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
10063
10064 /*
10065 * Copy over the generated code.
10066 * There should be no fixups or labels defined here.
10067 */
10068 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10069 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10070
10071 Assert(pReNative->cFixups == 0);
10072 Assert(pReNative->cLabels == 0);
10073
10074 /*
10075 * Initialize the context.
10076 */
10077 AssertCompile(kIemNativeLabelType_Invalid == 0);
10078 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10079 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10080 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10081 {
10082 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10083 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10084 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10085 }
10086
10087 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10088
10089 iemNativeTerm(pReNative);
10090 *ppCtx = pCtx;
10091 return VINF_SUCCESS;
10092}
10093
10094
10095/**
10096 * Recompiles the given threaded TB into a native one.
10097 *
10098 * In case of failure the translation block will be returned as-is.
10099 *
10100 * @returns pTb.
10101 * @param pVCpu The cross context virtual CPU structure of the calling
10102 * thread.
10103 * @param pTb The threaded translation to recompile to native.
10104 */
10105IEM_DECL_MSC_GUARD_IGNORE DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10106{
10107#if 0 /* For profiling the native recompiler code. */
10108l_profile_again:
10109#endif
10110 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10111
10112 /*
10113 * The first time thru, we allocate the recompiler state and save it,
10114 * all the other times we'll just reuse the saved one after a quick reset.
10115 */
10116 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10117 if (RT_LIKELY(pReNative))
10118 iemNativeReInit(pReNative, pTb);
10119 else
10120 {
10121 pReNative = iemNativeInit(pVCpu, pTb);
10122 AssertReturn(pReNative, pTb);
10123 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10124 }
10125
10126#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10127 /*
10128 * First do liveness analysis. This is done backwards.
10129 */
10130 {
10131 uint32_t idxCall = pTb->Thrd.cCalls;
10132 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10133 { /* likely */ }
10134 else
10135 {
10136 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10137 while (idxCall > cAlloc)
10138 cAlloc *= 2;
10139 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10140 AssertReturn(pvNew, pTb);
10141 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10142 pReNative->cLivenessEntriesAlloc = cAlloc;
10143 }
10144 AssertReturn(idxCall > 0, pTb);
10145 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10146
10147 /* The initial (final) entry. */
10148 idxCall--;
10149 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10150
10151 /* Loop backwards thru the calls and fill in the other entries. */
10152 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10153 while (idxCall > 0)
10154 {
10155 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10156 Assert(pfnLiveness);
10157 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10158 pCallEntry--;
10159 idxCall--;
10160 }
10161 }
10162#endif
10163
10164 /*
10165 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10166 * for aborting if an error happens.
10167 */
10168 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10169#ifdef LOG_ENABLED
10170 uint32_t const cCallsOrg = cCallsLeft;
10171#endif
10172 uint32_t off = 0;
10173 int rc = VINF_SUCCESS;
10174 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10175 {
10176 /*
10177 * Convert the calls to native code.
10178 */
10179#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10180 int32_t iGstInstr = -1;
10181#endif
10182#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10183 uint32_t cThreadedCalls = 0;
10184 uint32_t cRecompiledCalls = 0;
10185#endif
10186#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10187 uint32_t idxCurCall = 0;
10188#endif
10189 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10190 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10191 while (cCallsLeft-- > 0)
10192 {
10193 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10194#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10195 pReNative->idxCurCall = idxCurCall;
10196#endif
10197
10198#ifdef IEM_WITH_INTRA_TB_JUMPS
10199 /*
10200 * Define label for jump targets (currently only the first entry).
10201 */
10202 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10203 { /* likely */ }
10204 else
10205 {
10206 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10207 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10208 }
10209#endif
10210
10211 /*
10212 * Debug info, assembly markup and statistics.
10213 */
10214#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10215 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10216 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10217#endif
10218#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10219 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10220 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10221 {
10222 if (iGstInstr < (int32_t)pTb->cInstructions)
10223 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10224 else
10225 Assert(iGstInstr == pTb->cInstructions);
10226 iGstInstr = pCallEntry->idxInstr;
10227 }
10228 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10229#endif
10230#if defined(VBOX_STRICT)
10231 off = iemNativeEmitMarker(pReNative, off,
10232 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10233#endif
10234#if defined(VBOX_STRICT)
10235 iemNativeRegAssertSanity(pReNative);
10236#endif
10237#ifdef VBOX_WITH_STATISTICS
10238 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10239#endif
10240
10241#if 0
10242 if ( pTb->GCPhysPc == 0x00000000000c1240
10243 && idxCurCall == 67)
10244 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10245#endif
10246
10247 /*
10248 * Actual work.
10249 */
10250 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10251 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10252 if (pfnRecom) /** @todo stats on this. */
10253 {
10254 off = pfnRecom(pReNative, off, pCallEntry);
10255 STAM_REL_STATS({cRecompiledCalls++;});
10256 }
10257 else
10258 {
10259 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10260 STAM_REL_STATS({cThreadedCalls++;});
10261 }
10262 Assert(off <= pReNative->cInstrBufAlloc);
10263 Assert(pReNative->cCondDepth == 0);
10264
10265#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10266 if (LogIs2Enabled())
10267 {
10268 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10269# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10270 static const char s_achState[] = "CUXI";
10271# else
10272 /* 0123 4567 89ab cdef */
10273 /* CCCC CCCC */
10274 /* WWWW WWWW */
10275 /* RR RR RR RR */
10276 /* P P P P P P P P */
10277 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10278# endif
10279
10280 char szGpr[17];
10281 for (unsigned i = 0; i < 16; i++)
10282 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10283 szGpr[16] = '\0';
10284
10285 char szSegBase[X86_SREG_COUNT + 1];
10286 char szSegLimit[X86_SREG_COUNT + 1];
10287 char szSegAttrib[X86_SREG_COUNT + 1];
10288 char szSegSel[X86_SREG_COUNT + 1];
10289 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10290 {
10291 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10292 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10293 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10294 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10295 }
10296 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10297 = szSegSel[X86_SREG_COUNT] = '\0';
10298
10299 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10300 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10301 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10302 szEFlags[7] = '\0';
10303
10304 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10305 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10306 }
10307#endif
10308
10309 /*
10310 * Advance.
10311 */
10312 pCallEntry++;
10313#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10314 idxCurCall++;
10315#endif
10316 }
10317
10318 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10319 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10320 if (!cThreadedCalls)
10321 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10322
10323 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10324
10325#ifdef VBOX_WITH_STATISTICS
10326 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10327#endif
10328
10329 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10330 off = iemNativeRegFlushPendingWrites(pReNative, off);
10331
10332 /*
10333 * Jump to the common per-chunk epilog code.
10334 */
10335 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10336 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10337
10338 /*
10339 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10340 */
10341#ifndef RT_ARCH_AMD64
10342 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10343 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10344 AssertCompile(kIemNativeLabelType_Invalid == 0);
10345 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10346 if (fTailLabels)
10347 {
10348 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10349 do
10350 {
10351 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10352 fTailLabels &= ~RT_BIT_64(enmLabel);
10353
10354 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10355 AssertContinue(idxLabel != UINT32_MAX);
10356 iemNativeLabelDefine(pReNative, idxLabel, off);
10357
10358 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10359# ifdef RT_ARCH_ARM64
10360 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10361# else
10362# error "port me"
10363# endif
10364 } while (fTailLabels);
10365 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10366 }
10367#else
10368 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10369#endif
10370 }
10371 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10372 {
10373 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10374 return pTb;
10375 }
10376 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10377 Assert(off <= pReNative->cInstrBufAlloc);
10378
10379 /*
10380 * Make sure all labels has been defined.
10381 */
10382 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10383#ifdef VBOX_STRICT
10384 uint32_t const cLabels = pReNative->cLabels;
10385 for (uint32_t i = 0; i < cLabels; i++)
10386 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10387#endif
10388
10389#if 0 /* For profiling the native recompiler code. */
10390 if (pTb->Thrd.cCalls >= 136)
10391 {
10392 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10393 goto l_profile_again;
10394 }
10395#endif
10396
10397 /*
10398 * Allocate executable memory, copy over the code we've generated.
10399 */
10400 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10401 if (pTbAllocator->pDelayedFreeHead)
10402 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10403
10404 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10405 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10406 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10407 &paFinalInstrBufRx, &pCtx);
10408
10409 AssertReturn(paFinalInstrBuf, pTb);
10410 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10411
10412 /*
10413 * Apply fixups.
10414 */
10415 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10416 uint32_t const cFixups = pReNative->cFixups;
10417 for (uint32_t i = 0; i < cFixups; i++)
10418 {
10419 Assert(paFixups[i].off < off);
10420 Assert(paFixups[i].idxLabel < cLabels);
10421 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10422 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10423 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10424 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10425 switch (paFixups[i].enmType)
10426 {
10427#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10428 case kIemNativeFixupType_Rel32:
10429 Assert(paFixups[i].off + 4 <= off);
10430 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10431 continue;
10432
10433#elif defined(RT_ARCH_ARM64)
10434 case kIemNativeFixupType_RelImm26At0:
10435 {
10436 Assert(paFixups[i].off < off);
10437 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10438 Assert(offDisp >= -33554432 && offDisp < 33554432);
10439 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10440 continue;
10441 }
10442
10443 case kIemNativeFixupType_RelImm19At5:
10444 {
10445 Assert(paFixups[i].off < off);
10446 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10447 Assert(offDisp >= -262144 && offDisp < 262144);
10448 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10449 continue;
10450 }
10451
10452 case kIemNativeFixupType_RelImm14At5:
10453 {
10454 Assert(paFixups[i].off < off);
10455 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10456 Assert(offDisp >= -8192 && offDisp < 8192);
10457 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10458 continue;
10459 }
10460
10461#endif
10462 case kIemNativeFixupType_Invalid:
10463 case kIemNativeFixupType_End:
10464 break;
10465 }
10466 AssertFailed();
10467 }
10468
10469 /*
10470 * Apply TB exit fixups.
10471 */
10472 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10473 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10474 for (uint32_t i = 0; i < cTbExitFixups; i++)
10475 {
10476 Assert(paTbExitFixups[i].off < off);
10477 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10478 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10479
10480#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10481 Assert(paTbExitFixups[i].off + 4 <= off);
10482 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10483 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10484 *Ptr.pi32 = (int32_t)offDisp;
10485
10486#elif defined(RT_ARCH_ARM64)
10487 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10488 Assert(offDisp >= -33554432 && offDisp < 33554432);
10489 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10490
10491#else
10492# error "Port me!"
10493#endif
10494 }
10495
10496 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10497 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10498
10499 /*
10500 * Convert the translation block.
10501 */
10502 RTMemFree(pTb->Thrd.paCalls);
10503 pTb->Native.paInstructions = paFinalInstrBufRx;
10504 pTb->Native.cInstructions = off;
10505 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10506#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10507 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10508 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10509 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10510#endif
10511
10512 Assert(pTbAllocator->cThreadedTbs > 0);
10513 pTbAllocator->cThreadedTbs -= 1;
10514 pTbAllocator->cNativeTbs += 1;
10515 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10516
10517#ifdef LOG_ENABLED
10518 /*
10519 * Disassemble to the log if enabled.
10520 */
10521 if (LogIs3Enabled())
10522 {
10523 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10524 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10525# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10526 RTLogFlush(NULL);
10527# endif
10528 }
10529#endif
10530 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10531
10532 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10533 return pTb;
10534}
10535
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette