VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104407

Last change on this file since 104407 was 104407, checked in by vboxsync, 7 months ago

VMM/IEM: Adjusted the TB exit statistics a bit more, adding a few new one, making more of the release stats that doesn't go into the TB, and organizing them to try avoid counting the same exit more than once. bugref:10376 bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 412.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104407 2024-04-23 23:16:04Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
138 iemRaiseDivideErrorJmp(pVCpu);
139#ifndef _MSC_VER
140 return VINF_IEM_RAISED_XCPT; /* not reached */
141#endif
142}
143
144
145/**
146 * Used by TB code when it wants to raise a \#UD.
147 */
148IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
149{
150 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
151 iemRaiseUndefinedOpcodeJmp(pVCpu);
152#ifndef _MSC_VER
153 return VINF_IEM_RAISED_XCPT; /* not reached */
154#endif
155}
156
157
158/**
159 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
160 *
161 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
162 */
163IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
164{
165 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
166 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
167 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
168 iemRaiseUndefinedOpcodeJmp(pVCpu);
169 else
170 iemRaiseDeviceNotAvailableJmp(pVCpu);
171#ifndef _MSC_VER
172 return VINF_IEM_RAISED_XCPT; /* not reached */
173#endif
174}
175
176
177/**
178 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
179 *
180 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
181 */
182IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
183{
184 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
185 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
186 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
187 iemRaiseUndefinedOpcodeJmp(pVCpu);
188 else
189 iemRaiseDeviceNotAvailableJmp(pVCpu);
190#ifndef _MSC_VER
191 return VINF_IEM_RAISED_XCPT; /* not reached */
192#endif
193}
194
195
196/**
197 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
198 *
199 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
200 */
201IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
202{
203 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
204 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
205 iemRaiseSimdFpExceptionJmp(pVCpu);
206 else
207 iemRaiseUndefinedOpcodeJmp(pVCpu);
208#ifndef _MSC_VER
209 return VINF_IEM_RAISED_XCPT; /* not reached */
210#endif
211}
212
213
214/**
215 * Used by TB code when it wants to raise a \#NM.
216 */
217IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
218{
219 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
220 iemRaiseDeviceNotAvailableJmp(pVCpu);
221#ifndef _MSC_VER
222 return VINF_IEM_RAISED_XCPT; /* not reached */
223#endif
224}
225
226
227/**
228 * Used by TB code when it wants to raise a \#GP(0).
229 */
230IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
231{
232 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
233 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
234#ifndef _MSC_VER
235 return VINF_IEM_RAISED_XCPT; /* not reached */
236#endif
237}
238
239
240/**
241 * Used by TB code when it wants to raise a \#MF.
242 */
243IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
244{
245 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
246 iemRaiseMathFaultJmp(pVCpu);
247#ifndef _MSC_VER
248 return VINF_IEM_RAISED_XCPT; /* not reached */
249#endif
250}
251
252
253/**
254 * Used by TB code when it wants to raise a \#XF.
255 */
256IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
257{
258 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
259 iemRaiseSimdFpExceptionJmp(pVCpu);
260#ifndef _MSC_VER
261 return VINF_IEM_RAISED_XCPT; /* not reached */
262#endif
263}
264
265
266/**
267 * Used by TB code when detecting opcode changes.
268 * @see iemThreadeFuncWorkerObsoleteTb
269 */
270IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
271{
272 /* We set fSafeToFree to false where as we're being called in the context
273 of a TB callback function, which for native TBs means we cannot release
274 the executable memory till we've returned our way back to iemTbExec as
275 that return path codes via the native code generated for the TB. */
276 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
277 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
278 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
279 return VINF_IEM_REEXEC_BREAK;
280}
281
282
283/**
284 * Used by TB code when we need to switch to a TB with CS.LIM checking.
285 */
286IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
287{
288 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
289 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
290 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
291 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
292 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
293 return VINF_IEM_REEXEC_BREAK;
294}
295
296
297/**
298 * Used by TB code when we missed a PC check after a branch.
299 */
300IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
301{
302 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
303 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
304 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
305 pVCpu->iem.s.pbInstrBuf));
306 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
307 return VINF_IEM_REEXEC_BREAK;
308}
309
310
311
312/*********************************************************************************************************************************
313* Helpers: Segmented memory fetches and stores. *
314*********************************************************************************************************************************/
315
316/**
317 * Used by TB code to load unsigned 8-bit data w/ segmentation.
318 */
319IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
320{
321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
322 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
323#else
324 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
325#endif
326}
327
328
329/**
330 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
331 * to 16 bits.
332 */
333IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
334{
335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
336 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
337#else
338 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
339#endif
340}
341
342
343/**
344 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
345 * to 32 bits.
346 */
347IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
348{
349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
350 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
351#else
352 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
353#endif
354}
355
356/**
357 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
358 * to 64 bits.
359 */
360IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
361{
362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
363 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
364#else
365 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
366#endif
367}
368
369
370/**
371 * Used by TB code to load unsigned 16-bit data w/ segmentation.
372 */
373IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
374{
375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
376 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
377#else
378 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
379#endif
380}
381
382
383/**
384 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
385 * to 32 bits.
386 */
387IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
388{
389#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
390 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
391#else
392 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
393#endif
394}
395
396
397/**
398 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
399 * to 64 bits.
400 */
401IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
402{
403#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
404 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
405#else
406 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
407#endif
408}
409
410
411/**
412 * Used by TB code to load unsigned 32-bit data w/ segmentation.
413 */
414IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
415{
416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
417 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
418#else
419 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
420#endif
421}
422
423
424/**
425 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
426 * to 64 bits.
427 */
428IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
429{
430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
431 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
432#else
433 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
434#endif
435}
436
437
438/**
439 * Used by TB code to load unsigned 64-bit data w/ segmentation.
440 */
441IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
442{
443#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
444 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
445#else
446 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
447#endif
448}
449
450
451#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
452/**
453 * Used by TB code to load 128-bit data w/ segmentation.
454 */
455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
456{
457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
458 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
459#else
460 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
461#endif
462}
463
464
465/**
466 * Used by TB code to load 128-bit data w/ segmentation.
467 */
468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
469{
470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
471 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
472#else
473 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
474#endif
475}
476
477
478/**
479 * Used by TB code to load 128-bit data w/ segmentation.
480 */
481IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
482{
483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
484 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
485#else
486 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
487#endif
488}
489
490
491/**
492 * Used by TB code to load 256-bit data w/ segmentation.
493 */
494IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
495{
496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
497 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
498#else
499 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
500#endif
501}
502
503
504/**
505 * Used by TB code to load 256-bit data w/ segmentation.
506 */
507IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
508{
509#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
510 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
511#else
512 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
513#endif
514}
515#endif
516
517
518/**
519 * Used by TB code to store unsigned 8-bit data w/ segmentation.
520 */
521IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
522{
523#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
524 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
525#else
526 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
527#endif
528}
529
530
531/**
532 * Used by TB code to store unsigned 16-bit data w/ segmentation.
533 */
534IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
535{
536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
537 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
538#else
539 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
540#endif
541}
542
543
544/**
545 * Used by TB code to store unsigned 32-bit data w/ segmentation.
546 */
547IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
548{
549#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
550 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
551#else
552 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
553#endif
554}
555
556
557/**
558 * Used by TB code to store unsigned 64-bit data w/ segmentation.
559 */
560IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
561{
562#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
563 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
564#else
565 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
566#endif
567}
568
569
570#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
571/**
572 * Used by TB code to store unsigned 128-bit data w/ segmentation.
573 */
574IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
575{
576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
577 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
578#else
579 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
580#endif
581}
582
583
584/**
585 * Used by TB code to store unsigned 128-bit data w/ segmentation.
586 */
587IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
588{
589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
590 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
591#else
592 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
593#endif
594}
595
596
597/**
598 * Used by TB code to store unsigned 256-bit data w/ segmentation.
599 */
600IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
601{
602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
603 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
604#else
605 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
606#endif
607}
608
609
610/**
611 * Used by TB code to store unsigned 256-bit data w/ segmentation.
612 */
613IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
614{
615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
616 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
617#else
618 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
619#endif
620}
621#endif
622
623
624
625/**
626 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
627 */
628IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
629{
630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
631 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
632#else
633 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
634#endif
635}
636
637
638/**
639 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
640 */
641IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
642{
643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
644 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
645#else
646 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
647#endif
648}
649
650
651/**
652 * Used by TB code to store an 32-bit selector value onto a generic stack.
653 *
654 * Intel CPUs doesn't do write a whole dword, thus the special function.
655 */
656IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
657{
658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
659 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
660#else
661 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
662#endif
663}
664
665
666/**
667 * Used by TB code to push unsigned 64-bit value onto a generic stack.
668 */
669IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
670{
671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
672 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
673#else
674 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
675#endif
676}
677
678
679/**
680 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
681 */
682IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
683{
684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
685 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
686#else
687 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
688#endif
689}
690
691
692/**
693 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
694 */
695IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
696{
697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
698 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
699#else
700 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
701#endif
702}
703
704
705/**
706 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
707 */
708IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
709{
710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
711 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
712#else
713 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
714#endif
715}
716
717
718
719/*********************************************************************************************************************************
720* Helpers: Flat memory fetches and stores. *
721*********************************************************************************************************************************/
722
723/**
724 * Used by TB code to load unsigned 8-bit data w/ flat address.
725 * @note Zero extending the value to 64-bit to simplify assembly.
726 */
727IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
728{
729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
730 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
731#else
732 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
733#endif
734}
735
736
737/**
738 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
739 * to 16 bits.
740 * @note Zero extending the value to 64-bit to simplify assembly.
741 */
742IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
746#else
747 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
754 * to 32 bits.
755 * @note Zero extending the value to 64-bit to simplify assembly.
756 */
757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
758{
759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
760 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
761#else
762 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
763#endif
764}
765
766
767/**
768 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
769 * to 64 bits.
770 */
771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
772{
773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
774 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
775#else
776 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
777#endif
778}
779
780
781/**
782 * Used by TB code to load unsigned 16-bit data w/ flat address.
783 * @note Zero extending the value to 64-bit to simplify assembly.
784 */
785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
786{
787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
788 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
789#else
790 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
791#endif
792}
793
794
795/**
796 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
797 * to 32 bits.
798 * @note Zero extending the value to 64-bit to simplify assembly.
799 */
800IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
801{
802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
803 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
804#else
805 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
806#endif
807}
808
809
810/**
811 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
812 * to 64 bits.
813 * @note Zero extending the value to 64-bit to simplify assembly.
814 */
815IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
816{
817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
818 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
819#else
820 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
821#endif
822}
823
824
825/**
826 * Used by TB code to load unsigned 32-bit data w/ flat address.
827 * @note Zero extending the value to 64-bit to simplify assembly.
828 */
829IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
830{
831#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
832 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
833#else
834 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
835#endif
836}
837
838
839/**
840 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
841 * to 64 bits.
842 * @note Zero extending the value to 64-bit to simplify assembly.
843 */
844IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
845{
846#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
847 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
848#else
849 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
850#endif
851}
852
853
854/**
855 * Used by TB code to load unsigned 64-bit data w/ flat address.
856 */
857IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
858{
859#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
860 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
861#else
862 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
863#endif
864}
865
866
867#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
868/**
869 * Used by TB code to load unsigned 128-bit data w/ flat address.
870 */
871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
872{
873#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
874 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
875#else
876 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
877#endif
878}
879
880
881/**
882 * Used by TB code to load unsigned 128-bit data w/ flat address.
883 */
884IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
885{
886#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
887 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
888#else
889 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
890#endif
891}
892
893
894/**
895 * Used by TB code to load unsigned 128-bit data w/ flat address.
896 */
897IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
898{
899#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
900 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
901#else
902 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
903#endif
904}
905
906
907/**
908 * Used by TB code to load unsigned 256-bit data w/ flat address.
909 */
910IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
911{
912#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
913 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
914#else
915 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
916#endif
917}
918
919
920/**
921 * Used by TB code to load unsigned 256-bit data w/ flat address.
922 */
923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
924{
925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
926 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
927#else
928 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
929#endif
930}
931#endif
932
933
934/**
935 * Used by TB code to store unsigned 8-bit data w/ flat address.
936 */
937IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
938{
939#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
940 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
941#else
942 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
943#endif
944}
945
946
947/**
948 * Used by TB code to store unsigned 16-bit data w/ flat address.
949 */
950IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
951{
952#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
953 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
954#else
955 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
956#endif
957}
958
959
960/**
961 * Used by TB code to store unsigned 32-bit data w/ flat address.
962 */
963IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
964{
965#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
966 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
967#else
968 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
969#endif
970}
971
972
973/**
974 * Used by TB code to store unsigned 64-bit data w/ flat address.
975 */
976IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
977{
978#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
979 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
980#else
981 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
982#endif
983}
984
985
986#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
987/**
988 * Used by TB code to store unsigned 128-bit data w/ flat address.
989 */
990IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
991{
992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
993 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
994#else
995 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
996#endif
997}
998
999
1000/**
1001 * Used by TB code to store unsigned 128-bit data w/ flat address.
1002 */
1003IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1004{
1005#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1006 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1007#else
1008 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1009#endif
1010}
1011
1012
1013/**
1014 * Used by TB code to store unsigned 256-bit data w/ flat address.
1015 */
1016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1017{
1018#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1019 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1020#else
1021 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1022#endif
1023}
1024
1025
1026/**
1027 * Used by TB code to store unsigned 256-bit data w/ flat address.
1028 */
1029IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1030{
1031#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1032 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1033#else
1034 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1035#endif
1036}
1037#endif
1038
1039
1040
1041/**
1042 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1043 */
1044IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1045{
1046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1047 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1048#else
1049 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1050#endif
1051}
1052
1053
1054/**
1055 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1056 */
1057IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1058{
1059#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1060 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1061#else
1062 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1063#endif
1064}
1065
1066
1067/**
1068 * Used by TB code to store a segment selector value onto a flat stack.
1069 *
1070 * Intel CPUs doesn't do write a whole dword, thus the special function.
1071 */
1072IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1073{
1074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1075 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1076#else
1077 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1078#endif
1079}
1080
1081
1082/**
1083 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1084 */
1085IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1086{
1087#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1088 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1089#else
1090 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1091#endif
1092}
1093
1094
1095/**
1096 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1097 */
1098IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1099{
1100#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1101 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1102#else
1103 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1104#endif
1105}
1106
1107
1108/**
1109 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1110 */
1111IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1112{
1113#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1114 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1115#else
1116 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1117#endif
1118}
1119
1120
1121/**
1122 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1123 */
1124IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1125{
1126#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1127 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1128#else
1129 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1130#endif
1131}
1132
1133
1134
1135/*********************************************************************************************************************************
1136* Helpers: Segmented memory mapping. *
1137*********************************************************************************************************************************/
1138
1139/**
1140 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1141 * segmentation.
1142 */
1143IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1144 RTGCPTR GCPtrMem, uint8_t iSegReg))
1145{
1146#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1147 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1148#else
1149 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1150#endif
1151}
1152
1153
1154/**
1155 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1156 */
1157IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1158 RTGCPTR GCPtrMem, uint8_t iSegReg))
1159{
1160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1161 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1162#else
1163 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1164#endif
1165}
1166
1167
1168/**
1169 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1170 */
1171IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1172 RTGCPTR GCPtrMem, uint8_t iSegReg))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1175 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1176#else
1177 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1186 RTGCPTR GCPtrMem, uint8_t iSegReg))
1187{
1188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1189 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1190#else
1191 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1192#endif
1193}
1194
1195
1196/**
1197 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1198 * segmentation.
1199 */
1200IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1201 RTGCPTR GCPtrMem, uint8_t iSegReg))
1202{
1203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1204 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1205#else
1206 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1207#endif
1208}
1209
1210
1211/**
1212 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1213 */
1214IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1215 RTGCPTR GCPtrMem, uint8_t iSegReg))
1216{
1217#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1218 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1219#else
1220 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1221#endif
1222}
1223
1224
1225/**
1226 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1227 */
1228IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1229 RTGCPTR GCPtrMem, uint8_t iSegReg))
1230{
1231#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1232 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1233#else
1234 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1235#endif
1236}
1237
1238
1239/**
1240 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1241 */
1242IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1243 RTGCPTR GCPtrMem, uint8_t iSegReg))
1244{
1245#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1246 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1247#else
1248 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1249#endif
1250}
1251
1252
1253/**
1254 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1255 * segmentation.
1256 */
1257IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1258 RTGCPTR GCPtrMem, uint8_t iSegReg))
1259{
1260#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1261 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1262#else
1263 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1264#endif
1265}
1266
1267
1268/**
1269 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1270 */
1271IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1272 RTGCPTR GCPtrMem, uint8_t iSegReg))
1273{
1274#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1275 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1276#else
1277 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1278#endif
1279}
1280
1281
1282/**
1283 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1284 */
1285IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1286 RTGCPTR GCPtrMem, uint8_t iSegReg))
1287{
1288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1289 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1290#else
1291 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1292#endif
1293}
1294
1295
1296/**
1297 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1298 */
1299IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1300 RTGCPTR GCPtrMem, uint8_t iSegReg))
1301{
1302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1303 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1304#else
1305 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1306#endif
1307}
1308
1309
1310/**
1311 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1312 * segmentation.
1313 */
1314IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1315 RTGCPTR GCPtrMem, uint8_t iSegReg))
1316{
1317#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1318 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1319#else
1320 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1321#endif
1322}
1323
1324
1325/**
1326 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1327 */
1328IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1329 RTGCPTR GCPtrMem, uint8_t iSegReg))
1330{
1331#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1332 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1333#else
1334 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1335#endif
1336}
1337
1338
1339/**
1340 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1341 */
1342IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1343 RTGCPTR GCPtrMem, uint8_t iSegReg))
1344{
1345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1346 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1347#else
1348 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1349#endif
1350}
1351
1352
1353/**
1354 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1355 */
1356IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1357 RTGCPTR GCPtrMem, uint8_t iSegReg))
1358{
1359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1360 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1361#else
1362 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1363#endif
1364}
1365
1366
1367/**
1368 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1369 */
1370IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1371 RTGCPTR GCPtrMem, uint8_t iSegReg))
1372{
1373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1374 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1375#else
1376 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1377#endif
1378}
1379
1380
1381/**
1382 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1383 */
1384IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1385 RTGCPTR GCPtrMem, uint8_t iSegReg))
1386{
1387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1388 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1389#else
1390 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1391#endif
1392}
1393
1394
1395/**
1396 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1397 * segmentation.
1398 */
1399IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1400 RTGCPTR GCPtrMem, uint8_t iSegReg))
1401{
1402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1403 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1404#else
1405 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1406#endif
1407}
1408
1409
1410/**
1411 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1412 */
1413IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1414 RTGCPTR GCPtrMem, uint8_t iSegReg))
1415{
1416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1417 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1418#else
1419 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1420#endif
1421}
1422
1423
1424/**
1425 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1426 */
1427IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1428 RTGCPTR GCPtrMem, uint8_t iSegReg))
1429{
1430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1431 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1432#else
1433 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1434#endif
1435}
1436
1437
1438/**
1439 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1440 */
1441IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1442 RTGCPTR GCPtrMem, uint8_t iSegReg))
1443{
1444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1445 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1446#else
1447 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1448#endif
1449}
1450
1451
1452/*********************************************************************************************************************************
1453* Helpers: Flat memory mapping. *
1454*********************************************************************************************************************************/
1455
1456/**
1457 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1458 * address.
1459 */
1460IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1461{
1462#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1463 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1464#else
1465 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1466#endif
1467}
1468
1469
1470/**
1471 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1472 */
1473IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1474{
1475#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1476 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1477#else
1478 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1479#endif
1480}
1481
1482
1483/**
1484 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1485 */
1486IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1487{
1488#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1489 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1490#else
1491 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1492#endif
1493}
1494
1495
1496/**
1497 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1498 */
1499IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1500{
1501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1502 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1503#else
1504 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1505#endif
1506}
1507
1508
1509/**
1510 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1511 * address.
1512 */
1513IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1514{
1515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1516 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1517#else
1518 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1519#endif
1520}
1521
1522
1523/**
1524 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1525 */
1526IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1527{
1528#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1529 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1530#else
1531 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1532#endif
1533}
1534
1535
1536/**
1537 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1538 */
1539IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1540{
1541#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1542 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1543#else
1544 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1545#endif
1546}
1547
1548
1549/**
1550 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1551 */
1552IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1553{
1554#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1555 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1556#else
1557 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1558#endif
1559}
1560
1561
1562/**
1563 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1564 * address.
1565 */
1566IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1567{
1568#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1569 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1570#else
1571 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1572#endif
1573}
1574
1575
1576/**
1577 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1578 */
1579IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1580{
1581#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1582 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1583#else
1584 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1585#endif
1586}
1587
1588
1589/**
1590 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1591 */
1592IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1593{
1594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1595 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1596#else
1597 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1598#endif
1599}
1600
1601
1602/**
1603 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1604 */
1605IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1609#else
1610 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1617 * address.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1623#else
1624 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1633{
1634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1635 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1636#else
1637 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1638#endif
1639}
1640
1641
1642/**
1643 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1646{
1647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1648 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1649#else
1650 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1651#endif
1652}
1653
1654
1655/**
1656 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1657 */
1658IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1659{
1660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1661 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1662#else
1663 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1664#endif
1665}
1666
1667
1668/**
1669 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1670 */
1671IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1672{
1673#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1674 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1675#else
1676 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1677#endif
1678}
1679
1680
1681/**
1682 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1685{
1686#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1687 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1688#else
1689 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1696 * address.
1697 */
1698IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1699{
1700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1701 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1702#else
1703 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1704#endif
1705}
1706
1707
1708/**
1709 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1710 */
1711IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1712{
1713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1714 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1715#else
1716 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1717#endif
1718}
1719
1720
1721/**
1722 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1723 */
1724IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1725{
1726#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1727 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1728#else
1729 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1730#endif
1731}
1732
1733
1734/**
1735 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1736 */
1737IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1738{
1739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1740 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1741#else
1742 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1743#endif
1744}
1745
1746
1747/*********************************************************************************************************************************
1748* Helpers: Commit, rollback & unmap *
1749*********************************************************************************************************************************/
1750
1751/**
1752 * Used by TB code to commit and unmap a read-write memory mapping.
1753 */
1754IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1755{
1756 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1757}
1758
1759
1760/**
1761 * Used by TB code to commit and unmap a read-write memory mapping.
1762 */
1763IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1764{
1765 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1766}
1767
1768
1769/**
1770 * Used by TB code to commit and unmap a write-only memory mapping.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1773{
1774 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1775}
1776
1777
1778/**
1779 * Used by TB code to commit and unmap a read-only memory mapping.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1782{
1783 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1784}
1785
1786
1787/**
1788 * Reinitializes the native recompiler state.
1789 *
1790 * Called before starting a new recompile job.
1791 */
1792static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1793{
1794 pReNative->cLabels = 0;
1795 pReNative->bmLabelTypes = 0;
1796 pReNative->cFixups = 0;
1797#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1798 pReNative->pDbgInfo->cEntries = 0;
1799 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1800#endif
1801 pReNative->pTbOrg = pTb;
1802 pReNative->cCondDepth = 0;
1803 pReNative->uCondSeqNo = 0;
1804 pReNative->uCheckIrqSeqNo = 0;
1805 pReNative->uTlbSeqNo = 0;
1806
1807#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1808 pReNative->Core.offPc = 0;
1809 pReNative->Core.cInstrPcUpdateSkipped = 0;
1810#endif
1811#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1812 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1813#endif
1814 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1815#if IEMNATIVE_HST_GREG_COUNT < 32
1816 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1817#endif
1818 ;
1819 pReNative->Core.bmHstRegsWithGstShadow = 0;
1820 pReNative->Core.bmGstRegShadows = 0;
1821#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1822 pReNative->Core.bmGstRegShadowDirty = 0;
1823#endif
1824 pReNative->Core.bmVars = 0;
1825 pReNative->Core.bmStack = 0;
1826 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1827 pReNative->Core.u64ArgVars = UINT64_MAX;
1828
1829 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 18);
1830 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1837 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1838 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1839 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1840 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1841 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1842 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1843 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1844 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1845 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1846 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1847 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
1848
1849 /* Full host register reinit: */
1850 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1851 {
1852 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1853 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1854 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1855 }
1856
1857 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1858 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1859#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1860 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1861#endif
1862#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1863 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1864#endif
1865#ifdef IEMNATIVE_REG_FIXED_TMP1
1866 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1867#endif
1868#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1869 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1870#endif
1871 );
1872 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1873 {
1874 fRegs &= ~RT_BIT_32(idxReg);
1875 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1876 }
1877
1878 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1879#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1880 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1881#endif
1882#ifdef IEMNATIVE_REG_FIXED_TMP0
1883 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1884#endif
1885#ifdef IEMNATIVE_REG_FIXED_TMP1
1886 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1887#endif
1888#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1889 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1890#endif
1891
1892#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1893 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1894# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1895 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1896# endif
1897 ;
1898 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1899 pReNative->Core.bmGstSimdRegShadows = 0;
1900 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1901 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1902
1903 /* Full host register reinit: */
1904 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1905 {
1906 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1907 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1908 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1909 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1910 }
1911
1912 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1913 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1914 {
1915 fRegs &= ~RT_BIT_32(idxReg);
1916 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1917 }
1918
1919#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1920 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1921#endif
1922
1923#endif
1924
1925 return pReNative;
1926}
1927
1928
1929/**
1930 * Allocates and initializes the native recompiler state.
1931 *
1932 * This is called the first time an EMT wants to recompile something.
1933 *
1934 * @returns Pointer to the new recompiler state.
1935 * @param pVCpu The cross context virtual CPU structure of the calling
1936 * thread.
1937 * @param pTb The TB that's about to be recompiled.
1938 * @thread EMT(pVCpu)
1939 */
1940static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1941{
1942 VMCPU_ASSERT_EMT(pVCpu);
1943
1944 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1945 AssertReturn(pReNative, NULL);
1946
1947 /*
1948 * Try allocate all the buffers and stuff we need.
1949 */
1950 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1951 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1952 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1953#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1954 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1955#endif
1956 if (RT_LIKELY( pReNative->pInstrBuf
1957 && pReNative->paLabels
1958 && pReNative->paFixups)
1959#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1960 && pReNative->pDbgInfo
1961#endif
1962 )
1963 {
1964 /*
1965 * Set the buffer & array sizes on success.
1966 */
1967 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1968 pReNative->cLabelsAlloc = _8K;
1969 pReNative->cFixupsAlloc = _16K;
1970#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1971 pReNative->cDbgInfoAlloc = _16K;
1972#endif
1973
1974 /* Other constant stuff: */
1975 pReNative->pVCpu = pVCpu;
1976
1977 /*
1978 * Done, just need to save it and reinit it.
1979 */
1980 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1981 return iemNativeReInit(pReNative, pTb);
1982 }
1983
1984 /*
1985 * Failed. Cleanup and return.
1986 */
1987 AssertFailed();
1988 RTMemFree(pReNative->pInstrBuf);
1989 RTMemFree(pReNative->paLabels);
1990 RTMemFree(pReNative->paFixups);
1991#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1992 RTMemFree(pReNative->pDbgInfo);
1993#endif
1994 RTMemFree(pReNative);
1995 return NULL;
1996}
1997
1998
1999/**
2000 * Creates a label
2001 *
2002 * If the label does not yet have a defined position,
2003 * call iemNativeLabelDefine() later to set it.
2004 *
2005 * @returns Label ID. Throws VBox status code on failure, so no need to check
2006 * the return value.
2007 * @param pReNative The native recompile state.
2008 * @param enmType The label type.
2009 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2010 * label is not yet defined (default).
2011 * @param uData Data associated with the lable. Only applicable to
2012 * certain type of labels. Default is zero.
2013 */
2014DECL_HIDDEN_THROW(uint32_t)
2015iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2016 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2017{
2018 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2019
2020 /*
2021 * Locate existing label definition.
2022 *
2023 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2024 * and uData is zero.
2025 */
2026 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2027 uint32_t const cLabels = pReNative->cLabels;
2028 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2029#ifndef VBOX_STRICT
2030 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2031 && offWhere == UINT32_MAX
2032 && uData == 0
2033#endif
2034 )
2035 {
2036#ifndef VBOX_STRICT
2037 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2038 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2039 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2040 if (idxLabel < pReNative->cLabels)
2041 return idxLabel;
2042#else
2043 for (uint32_t i = 0; i < cLabels; i++)
2044 if ( paLabels[i].enmType == enmType
2045 && paLabels[i].uData == uData)
2046 {
2047 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2048 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2049 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2050 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2051 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2052 return i;
2053 }
2054 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2055 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2056#endif
2057 }
2058
2059 /*
2060 * Make sure we've got room for another label.
2061 */
2062 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2063 { /* likely */ }
2064 else
2065 {
2066 uint32_t cNew = pReNative->cLabelsAlloc;
2067 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2068 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2069 cNew *= 2;
2070 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2071 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2072 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2073 pReNative->paLabels = paLabels;
2074 pReNative->cLabelsAlloc = cNew;
2075 }
2076
2077 /*
2078 * Define a new label.
2079 */
2080 paLabels[cLabels].off = offWhere;
2081 paLabels[cLabels].enmType = enmType;
2082 paLabels[cLabels].uData = uData;
2083 pReNative->cLabels = cLabels + 1;
2084
2085 Assert((unsigned)enmType < 64);
2086 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2087
2088 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2089 {
2090 Assert(uData == 0);
2091 pReNative->aidxUniqueLabels[enmType] = cLabels;
2092 }
2093
2094 if (offWhere != UINT32_MAX)
2095 {
2096#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2097 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2098 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2099#endif
2100 }
2101 return cLabels;
2102}
2103
2104
2105/**
2106 * Defines the location of an existing label.
2107 *
2108 * @param pReNative The native recompile state.
2109 * @param idxLabel The label to define.
2110 * @param offWhere The position.
2111 */
2112DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2113{
2114 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2115 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2116 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2117 pLabel->off = offWhere;
2118#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2119 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2120 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2121#endif
2122}
2123
2124
2125/**
2126 * Looks up a lable.
2127 *
2128 * @returns Label ID if found, UINT32_MAX if not.
2129 */
2130static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2131 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2132{
2133 Assert((unsigned)enmType < 64);
2134 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2135 {
2136 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2137 return pReNative->aidxUniqueLabels[enmType];
2138
2139 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2140 uint32_t const cLabels = pReNative->cLabels;
2141 for (uint32_t i = 0; i < cLabels; i++)
2142 if ( paLabels[i].enmType == enmType
2143 && paLabels[i].uData == uData
2144 && ( paLabels[i].off == offWhere
2145 || offWhere == UINT32_MAX
2146 || paLabels[i].off == UINT32_MAX))
2147 return i;
2148 }
2149 return UINT32_MAX;
2150}
2151
2152
2153/**
2154 * Adds a fixup.
2155 *
2156 * @throws VBox status code (int) on failure.
2157 * @param pReNative The native recompile state.
2158 * @param offWhere The instruction offset of the fixup location.
2159 * @param idxLabel The target label ID for the fixup.
2160 * @param enmType The fixup type.
2161 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2162 */
2163DECL_HIDDEN_THROW(void)
2164iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2165 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2166{
2167 Assert(idxLabel <= UINT16_MAX);
2168 Assert((unsigned)enmType <= UINT8_MAX);
2169#ifdef RT_ARCH_ARM64
2170 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2171 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2172 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2173#endif
2174
2175 /*
2176 * Make sure we've room.
2177 */
2178 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2179 uint32_t const cFixups = pReNative->cFixups;
2180 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2181 { /* likely */ }
2182 else
2183 {
2184 uint32_t cNew = pReNative->cFixupsAlloc;
2185 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2186 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2187 cNew *= 2;
2188 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2189 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2190 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2191 pReNative->paFixups = paFixups;
2192 pReNative->cFixupsAlloc = cNew;
2193 }
2194
2195 /*
2196 * Add the fixup.
2197 */
2198 paFixups[cFixups].off = offWhere;
2199 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2200 paFixups[cFixups].enmType = enmType;
2201 paFixups[cFixups].offAddend = offAddend;
2202 pReNative->cFixups = cFixups + 1;
2203}
2204
2205
2206/**
2207 * Slow code path for iemNativeInstrBufEnsure.
2208 */
2209DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2210{
2211 /* Double the buffer size till we meet the request. */
2212 uint32_t cNew = pReNative->cInstrBufAlloc;
2213 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2214 do
2215 cNew *= 2;
2216 while (cNew < off + cInstrReq);
2217
2218 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2219#ifdef RT_ARCH_ARM64
2220 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2221#else
2222 uint32_t const cbMaxInstrBuf = _2M;
2223#endif
2224 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2225
2226 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2227 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2228
2229#ifdef VBOX_STRICT
2230 pReNative->offInstrBufChecked = off + cInstrReq;
2231#endif
2232 pReNative->cInstrBufAlloc = cNew;
2233 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2234}
2235
2236#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2237
2238/**
2239 * Grows the static debug info array used during recompilation.
2240 *
2241 * @returns Pointer to the new debug info block; throws VBox status code on
2242 * failure, so no need to check the return value.
2243 */
2244DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2245{
2246 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2247 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2248 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2249 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2250 pReNative->pDbgInfo = pDbgInfo;
2251 pReNative->cDbgInfoAlloc = cNew;
2252 return pDbgInfo;
2253}
2254
2255
2256/**
2257 * Adds a new debug info uninitialized entry, returning the pointer to it.
2258 */
2259DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2260{
2261 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2262 { /* likely */ }
2263 else
2264 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2265 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2266}
2267
2268
2269/**
2270 * Debug Info: Adds a native offset record, if necessary.
2271 */
2272DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2273{
2274 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2275
2276 /*
2277 * Do we need this one?
2278 */
2279 uint32_t const offPrev = pDbgInfo->offNativeLast;
2280 if (offPrev == off)
2281 return;
2282 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2283
2284 /*
2285 * Add it.
2286 */
2287 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2288 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2289 pEntry->NativeOffset.offNative = off;
2290 pDbgInfo->offNativeLast = off;
2291}
2292
2293
2294/**
2295 * Debug Info: Record info about a label.
2296 */
2297static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2298{
2299 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2300 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2301 pEntry->Label.uUnused = 0;
2302 pEntry->Label.enmLabel = (uint8_t)enmType;
2303 pEntry->Label.uData = uData;
2304}
2305
2306
2307/**
2308 * Debug Info: Record info about a threaded call.
2309 */
2310static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2311{
2312 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2313 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2314 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2315 pEntry->ThreadedCall.uUnused = 0;
2316 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2317}
2318
2319
2320/**
2321 * Debug Info: Record info about a new guest instruction.
2322 */
2323static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2324{
2325 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2326 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2327 pEntry->GuestInstruction.uUnused = 0;
2328 pEntry->GuestInstruction.fExec = fExec;
2329}
2330
2331
2332/**
2333 * Debug Info: Record info about guest register shadowing.
2334 */
2335DECL_HIDDEN_THROW(void)
2336iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2337 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2338{
2339 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2340 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2341 pEntry->GuestRegShadowing.uUnused = 0;
2342 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2343 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2344 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2345#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2346 Assert( idxHstReg != UINT8_MAX
2347 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2348#endif
2349}
2350
2351
2352# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2353/**
2354 * Debug Info: Record info about guest register shadowing.
2355 */
2356DECL_HIDDEN_THROW(void)
2357iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2358 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2359{
2360 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2361 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2362 pEntry->GuestSimdRegShadowing.uUnused = 0;
2363 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2364 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2365 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2366}
2367# endif
2368
2369
2370# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2371/**
2372 * Debug Info: Record info about delayed RIP updates.
2373 */
2374DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2375{
2376 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2377 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2378 pEntry->DelayedPcUpdate.offPc = offPc;
2379 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2380}
2381# endif
2382
2383# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2384
2385/**
2386 * Debug Info: Record info about a dirty guest register.
2387 */
2388DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2389 uint8_t idxGstReg, uint8_t idxHstReg)
2390{
2391 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2392 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2393 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2394 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2395 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2396}
2397
2398
2399/**
2400 * Debug Info: Record info about a dirty guest register writeback operation.
2401 */
2402DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2403{
2404 unsigned const cBitsGstRegMask = 25;
2405 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2406
2407 /* The first block of 25 bits: */
2408 if (fGstReg & fGstRegMask)
2409 {
2410 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2411 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2412 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2413 pEntry->GuestRegWriteback.cShift = 0;
2414 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2415 fGstReg &= ~(uint64_t)fGstRegMask;
2416 if (!fGstReg)
2417 return;
2418 }
2419
2420 /* The second block of 25 bits: */
2421 fGstReg >>= cBitsGstRegMask;
2422 if (fGstReg & fGstRegMask)
2423 {
2424 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2425 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2426 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2427 pEntry->GuestRegWriteback.cShift = 0;
2428 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2429 fGstReg &= ~(uint64_t)fGstRegMask;
2430 if (!fGstReg)
2431 return;
2432 }
2433
2434 /* The last block with 14 bits: */
2435 fGstReg >>= cBitsGstRegMask;
2436 Assert(fGstReg & fGstRegMask);
2437 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2438 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2439 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2440 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2441 pEntry->GuestRegWriteback.cShift = 2;
2442 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2443}
2444
2445# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2446
2447#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2448
2449
2450/*********************************************************************************************************************************
2451* Register Allocator *
2452*********************************************************************************************************************************/
2453
2454/**
2455 * Register parameter indexes (indexed by argument number).
2456 */
2457DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2458{
2459 IEMNATIVE_CALL_ARG0_GREG,
2460 IEMNATIVE_CALL_ARG1_GREG,
2461 IEMNATIVE_CALL_ARG2_GREG,
2462 IEMNATIVE_CALL_ARG3_GREG,
2463#if defined(IEMNATIVE_CALL_ARG4_GREG)
2464 IEMNATIVE_CALL_ARG4_GREG,
2465# if defined(IEMNATIVE_CALL_ARG5_GREG)
2466 IEMNATIVE_CALL_ARG5_GREG,
2467# if defined(IEMNATIVE_CALL_ARG6_GREG)
2468 IEMNATIVE_CALL_ARG6_GREG,
2469# if defined(IEMNATIVE_CALL_ARG7_GREG)
2470 IEMNATIVE_CALL_ARG7_GREG,
2471# endif
2472# endif
2473# endif
2474#endif
2475};
2476AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2477
2478/**
2479 * Call register masks indexed by argument count.
2480 */
2481DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2482{
2483 0,
2484 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2486 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2487 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2488 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2489#if defined(IEMNATIVE_CALL_ARG4_GREG)
2490 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2491 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2492# if defined(IEMNATIVE_CALL_ARG5_GREG)
2493 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2494 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2495# if defined(IEMNATIVE_CALL_ARG6_GREG)
2496 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2497 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2498 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2499# if defined(IEMNATIVE_CALL_ARG7_GREG)
2500 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2501 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2502 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2503# endif
2504# endif
2505# endif
2506#endif
2507};
2508
2509#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2510/**
2511 * BP offset of the stack argument slots.
2512 *
2513 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2514 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2515 */
2516DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2517{
2518 IEMNATIVE_FP_OFF_STACK_ARG0,
2519# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2520 IEMNATIVE_FP_OFF_STACK_ARG1,
2521# endif
2522# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2523 IEMNATIVE_FP_OFF_STACK_ARG2,
2524# endif
2525# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2526 IEMNATIVE_FP_OFF_STACK_ARG3,
2527# endif
2528};
2529AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2530#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2531
2532/**
2533 * Info about shadowed guest register values.
2534 * @see IEMNATIVEGSTREG
2535 */
2536DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2537{
2538#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2539 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2540 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2541 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2542 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2543 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2544 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2545 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2546 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2547 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2548 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2549 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2550 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2551 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2552 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2553 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2554 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2555 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2556 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2557 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2558 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2559 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2560 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2561 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2562 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2563 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2564 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2565 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2566 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2567 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2568 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2569 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2570 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2571 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2572 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2573 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2574 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2575 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2576 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2577 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2578 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2579 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2580 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2581 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2582 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2583 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2584 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2585 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2586 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2587#undef CPUMCTX_OFF_AND_SIZE
2588};
2589AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2590
2591
2592/** Host CPU general purpose register names. */
2593DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2594{
2595#ifdef RT_ARCH_AMD64
2596 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2597#elif RT_ARCH_ARM64
2598 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2599 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2600#else
2601# error "port me"
2602#endif
2603};
2604
2605
2606#if 0 /* unused */
2607/**
2608 * Tries to locate a suitable register in the given register mask.
2609 *
2610 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2611 * failed.
2612 *
2613 * @returns Host register number on success, returns UINT8_MAX on failure.
2614 */
2615static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2616{
2617 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2618 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2619 if (fRegs)
2620 {
2621 /** @todo pick better here: */
2622 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2623
2624 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2625 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2626 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2627 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2628
2629 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2630 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2631 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2632 return idxReg;
2633 }
2634 return UINT8_MAX;
2635}
2636#endif /* unused */
2637
2638
2639#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2640/**
2641 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2642 *
2643 * @returns New code buffer offset on success, UINT32_MAX on failure.
2644 * @param pReNative .
2645 * @param off The current code buffer position.
2646 * @param enmGstReg The guest register to store to.
2647 * @param idxHstReg The host register to store from.
2648 */
2649DECL_FORCE_INLINE_THROW(uint32_t)
2650iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2651{
2652 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2653 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2654
2655 switch (g_aGstShadowInfo[enmGstReg].cb)
2656 {
2657 case sizeof(uint64_t):
2658 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2659 case sizeof(uint32_t):
2660 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2661 case sizeof(uint16_t):
2662 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2663#if 0 /* not present in the table. */
2664 case sizeof(uint8_t):
2665 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2666#endif
2667 default:
2668 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2669 }
2670}
2671
2672
2673/**
2674 * Emits code to flush a pending write of the given guest register if any.
2675 *
2676 * @returns New code buffer offset.
2677 * @param pReNative The native recompile state.
2678 * @param off Current code buffer position.
2679 * @param enmGstReg The guest register to flush.
2680 */
2681DECL_HIDDEN_THROW(uint32_t)
2682iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2683{
2684 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2685
2686 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2687 && enmGstReg <= kIemNativeGstReg_GprLast)
2688 || enmGstReg == kIemNativeGstReg_MxCsr);
2689 Assert( idxHstReg != UINT8_MAX
2690 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2691 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2692 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2693
2694 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2695
2696 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2697 return off;
2698}
2699
2700
2701/**
2702 * Flush the given set of guest registers if marked as dirty.
2703 *
2704 * @returns New code buffer offset.
2705 * @param pReNative The native recompile state.
2706 * @param off Current code buffer position.
2707 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2708 */
2709DECL_HIDDEN_THROW(uint32_t)
2710iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2711{
2712 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2713 if (bmGstRegShadowDirty)
2714 {
2715# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2716 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2717 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2718# endif
2719 do
2720 {
2721 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2722 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2723 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2724 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2725 } while (bmGstRegShadowDirty);
2726 }
2727
2728 return off;
2729}
2730
2731
2732/**
2733 * Flush all shadowed guest registers marked as dirty for the given host register.
2734 *
2735 * @returns New code buffer offset.
2736 * @param pReNative The native recompile state.
2737 * @param off Current code buffer position.
2738 * @param idxHstReg The host register.
2739 *
2740 * @note This doesn't do any unshadowing of guest registers from the host register.
2741 */
2742DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2743{
2744 /* We need to flush any pending guest register writes this host register shadows. */
2745 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2746 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2747 {
2748# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2749 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2750 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2751# endif
2752 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2753 * likely to only have a single bit set. It'll be in the 0..15 range,
2754 * but still it's 15 unnecessary loops for the last guest register. */
2755
2756 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2757 do
2758 {
2759 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2760 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2761 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2762 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2763 } while (bmGstRegShadowDirty);
2764 }
2765
2766 return off;
2767}
2768#endif
2769
2770
2771/**
2772 * Locate a register, possibly freeing one up.
2773 *
2774 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2775 * failed.
2776 *
2777 * @returns Host register number on success. Returns UINT8_MAX if no registers
2778 * found, the caller is supposed to deal with this and raise a
2779 * allocation type specific status code (if desired).
2780 *
2781 * @throws VBox status code if we're run into trouble spilling a variable of
2782 * recording debug info. Does NOT throw anything if we're out of
2783 * registers, though.
2784 */
2785static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2786 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2787{
2788 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2789 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2790 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2791
2792 /*
2793 * Try a freed register that's shadowing a guest register.
2794 */
2795 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2796 if (fRegs)
2797 {
2798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2799
2800#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2801 /*
2802 * When we have livness information, we use it to kick out all shadowed
2803 * guest register that will not be needed any more in this TB. If we're
2804 * lucky, this may prevent us from ending up here again.
2805 *
2806 * Note! We must consider the previous entry here so we don't free
2807 * anything that the current threaded function requires (current
2808 * entry is produced by the next threaded function).
2809 */
2810 uint32_t const idxCurCall = pReNative->idxCurCall;
2811 if (idxCurCall > 0)
2812 {
2813 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2814
2815# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2816 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2817 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2818 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2819#else
2820 /* Construct a mask of the registers not in the read or write state.
2821 Note! We could skips writes, if they aren't from us, as this is just
2822 a hack to prevent trashing registers that have just been written
2823 or will be written when we retire the current instruction. */
2824 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2825 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2826 & IEMLIVENESSBIT_MASK;
2827#endif
2828 /* Merge EFLAGS. */
2829 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2830 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2831 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2832 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2833 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2834
2835 /* If it matches any shadowed registers. */
2836 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2837 {
2838#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2839 /* Writeback any dirty shadow registers we are about to unshadow. */
2840 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2841#endif
2842
2843 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2844 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2845 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2846
2847 /* See if we've got any unshadowed registers we can return now. */
2848 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2849 if (fUnshadowedRegs)
2850 {
2851 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2852 return (fPreferVolatile
2853 ? ASMBitFirstSetU32(fUnshadowedRegs)
2854 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2855 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2856 - 1;
2857 }
2858 }
2859 }
2860#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2861
2862 unsigned const idxReg = (fPreferVolatile
2863 ? ASMBitFirstSetU32(fRegs)
2864 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2865 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2866 - 1;
2867
2868 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2869 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2870 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2871 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2872
2873#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2874 /* We need to flush any pending guest register writes this host register shadows. */
2875 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2876#endif
2877
2878 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2879 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2880 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2881 return idxReg;
2882 }
2883
2884 /*
2885 * Try free up a variable that's in a register.
2886 *
2887 * We do two rounds here, first evacuating variables we don't need to be
2888 * saved on the stack, then in the second round move things to the stack.
2889 */
2890 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2891 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2892 {
2893 uint32_t fVars = pReNative->Core.bmVars;
2894 while (fVars)
2895 {
2896 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2897 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2898#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2899 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2900 continue;
2901#endif
2902
2903 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2904 && (RT_BIT_32(idxReg) & fRegMask)
2905 && ( iLoop == 0
2906 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2907 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2908 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2909 {
2910 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2911 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2912 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2913 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2914 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2915 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2916#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2917 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2918#endif
2919
2920 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2921 {
2922 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2923 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2924 }
2925
2926 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2927 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2928
2929 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2930 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2931 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2932 return idxReg;
2933 }
2934 fVars &= ~RT_BIT_32(idxVar);
2935 }
2936 }
2937
2938 return UINT8_MAX;
2939}
2940
2941
2942/**
2943 * Reassigns a variable to a different register specified by the caller.
2944 *
2945 * @returns The new code buffer position.
2946 * @param pReNative The native recompile state.
2947 * @param off The current code buffer position.
2948 * @param idxVar The variable index.
2949 * @param idxRegOld The old host register number.
2950 * @param idxRegNew The new host register number.
2951 * @param pszCaller The caller for logging.
2952 */
2953static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2954 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2955{
2956 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2957 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2958#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2959 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2960#endif
2961 RT_NOREF(pszCaller);
2962
2963#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2964 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2965#endif
2966 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2967
2968 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2969#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2970 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2971#endif
2972 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2973 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2974 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2975
2976 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2977 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2978 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2979 if (fGstRegShadows)
2980 {
2981 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2982 | RT_BIT_32(idxRegNew);
2983 while (fGstRegShadows)
2984 {
2985 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2986 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2987
2988 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2989 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2990 }
2991 }
2992
2993 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2994 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2995 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2996 return off;
2997}
2998
2999
3000/**
3001 * Moves a variable to a different register or spills it onto the stack.
3002 *
3003 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3004 * kinds can easily be recreated if needed later.
3005 *
3006 * @returns The new code buffer position.
3007 * @param pReNative The native recompile state.
3008 * @param off The current code buffer position.
3009 * @param idxVar The variable index.
3010 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3011 * call-volatile registers.
3012 */
3013DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3014 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3015{
3016 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3017 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3018 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3019 Assert(!pVar->fRegAcquired);
3020
3021 uint8_t const idxRegOld = pVar->idxReg;
3022 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3023 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3024 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3025 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3026 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3027 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3028 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3029 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3030#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3031 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3032#endif
3033
3034
3035 /** @todo Add statistics on this.*/
3036 /** @todo Implement basic variable liveness analysis (python) so variables
3037 * can be freed immediately once no longer used. This has the potential to
3038 * be trashing registers and stack for dead variables.
3039 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3040
3041 /*
3042 * First try move it to a different register, as that's cheaper.
3043 */
3044 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3045 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3046 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3047 if (fRegs)
3048 {
3049 /* Avoid using shadow registers, if possible. */
3050 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3051 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3052 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3053 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3054 }
3055
3056 /*
3057 * Otherwise we must spill the register onto the stack.
3058 */
3059 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3060 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3061 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3062 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3063
3064 pVar->idxReg = UINT8_MAX;
3065 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3066 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3067 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3068 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3069 return off;
3070}
3071
3072
3073/**
3074 * Allocates a temporary host general purpose register.
3075 *
3076 * This may emit code to save register content onto the stack in order to free
3077 * up a register.
3078 *
3079 * @returns The host register number; throws VBox status code on failure,
3080 * so no need to check the return value.
3081 * @param pReNative The native recompile state.
3082 * @param poff Pointer to the variable with the code buffer position.
3083 * This will be update if we need to move a variable from
3084 * register to stack in order to satisfy the request.
3085 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3086 * registers (@c true, default) or the other way around
3087 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3088 */
3089DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3090{
3091 /*
3092 * Try find a completely unused register, preferably a call-volatile one.
3093 */
3094 uint8_t idxReg;
3095 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3096 & ~pReNative->Core.bmHstRegsWithGstShadow
3097 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3098 if (fRegs)
3099 {
3100 if (fPreferVolatile)
3101 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3102 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3103 else
3104 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3105 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3106 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3107 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3108 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3109 }
3110 else
3111 {
3112 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3113 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3114 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3115 }
3116 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3117}
3118
3119
3120/**
3121 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3122 * registers.
3123 *
3124 * @returns The host register number; throws VBox status code on failure,
3125 * so no need to check the return value.
3126 * @param pReNative The native recompile state.
3127 * @param poff Pointer to the variable with the code buffer position.
3128 * This will be update if we need to move a variable from
3129 * register to stack in order to satisfy the request.
3130 * @param fRegMask Mask of acceptable registers.
3131 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3132 * registers (@c true, default) or the other way around
3133 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3134 */
3135DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3136 bool fPreferVolatile /*= true*/)
3137{
3138 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3139 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3140
3141 /*
3142 * Try find a completely unused register, preferably a call-volatile one.
3143 */
3144 uint8_t idxReg;
3145 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3146 & ~pReNative->Core.bmHstRegsWithGstShadow
3147 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3148 & fRegMask;
3149 if (fRegs)
3150 {
3151 if (fPreferVolatile)
3152 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3153 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3154 else
3155 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3156 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3157 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3158 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3159 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3160 }
3161 else
3162 {
3163 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3164 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3165 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3166 }
3167 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3168}
3169
3170
3171/**
3172 * Allocates a temporary register for loading an immediate value into.
3173 *
3174 * This will emit code to load the immediate, unless there happens to be an
3175 * unused register with the value already loaded.
3176 *
3177 * The caller will not modify the returned register, it must be considered
3178 * read-only. Free using iemNativeRegFreeTmpImm.
3179 *
3180 * @returns The host register number; throws VBox status code on failure, so no
3181 * need to check the return value.
3182 * @param pReNative The native recompile state.
3183 * @param poff Pointer to the variable with the code buffer position.
3184 * @param uImm The immediate value that the register must hold upon
3185 * return.
3186 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3187 * registers (@c true, default) or the other way around
3188 * (@c false).
3189 *
3190 * @note Reusing immediate values has not been implemented yet.
3191 */
3192DECL_HIDDEN_THROW(uint8_t)
3193iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3194{
3195 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3196 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3197 return idxReg;
3198}
3199
3200
3201/**
3202 * Allocates a temporary host general purpose register for keeping a guest
3203 * register value.
3204 *
3205 * Since we may already have a register holding the guest register value,
3206 * code will be emitted to do the loading if that's not the case. Code may also
3207 * be emitted if we have to free up a register to satify the request.
3208 *
3209 * @returns The host register number; throws VBox status code on failure, so no
3210 * need to check the return value.
3211 * @param pReNative The native recompile state.
3212 * @param poff Pointer to the variable with the code buffer
3213 * position. This will be update if we need to move a
3214 * variable from register to stack in order to satisfy
3215 * the request.
3216 * @param enmGstReg The guest register that will is to be updated.
3217 * @param enmIntendedUse How the caller will be using the host register.
3218 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3219 * register is okay (default). The ASSUMPTION here is
3220 * that the caller has already flushed all volatile
3221 * registers, so this is only applied if we allocate a
3222 * new register.
3223 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3224 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3225 */
3226DECL_HIDDEN_THROW(uint8_t)
3227iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3228 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3229 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3230{
3231 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3232#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3233 AssertMsg( fSkipLivenessAssert
3234 || pReNative->idxCurCall == 0
3235 || enmGstReg == kIemNativeGstReg_Pc
3236 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3237 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3238 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3239 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3240 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3241 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3242#endif
3243 RT_NOREF(fSkipLivenessAssert);
3244#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3245 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3246#endif
3247 uint32_t const fRegMask = !fNoVolatileRegs
3248 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3249 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3250
3251 /*
3252 * First check if the guest register value is already in a host register.
3253 */
3254 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3255 {
3256 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3257 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3258 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3259 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3260
3261 /* It's not supposed to be allocated... */
3262 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3263 {
3264 /*
3265 * If the register will trash the guest shadow copy, try find a
3266 * completely unused register we can use instead. If that fails,
3267 * we need to disassociate the host reg from the guest reg.
3268 */
3269 /** @todo would be nice to know if preserving the register is in any way helpful. */
3270 /* If the purpose is calculations, try duplicate the register value as
3271 we'll be clobbering the shadow. */
3272 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3273 && ( ~pReNative->Core.bmHstRegs
3274 & ~pReNative->Core.bmHstRegsWithGstShadow
3275 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3276 {
3277 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3278
3279 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3280
3281 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3282 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3283 g_apszIemNativeHstRegNames[idxRegNew]));
3284 idxReg = idxRegNew;
3285 }
3286 /* If the current register matches the restrictions, go ahead and allocate
3287 it for the caller. */
3288 else if (fRegMask & RT_BIT_32(idxReg))
3289 {
3290 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3291 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3292 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3293 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3294 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3295 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3296 else
3297 {
3298 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3299 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3300 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3301 }
3302 }
3303 /* Otherwise, allocate a register that satisfies the caller and transfer
3304 the shadowing if compatible with the intended use. (This basically
3305 means the call wants a non-volatile register (RSP push/pop scenario).) */
3306 else
3307 {
3308 Assert(fNoVolatileRegs);
3309 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3310 !fNoVolatileRegs
3311 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3312 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3313 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3314 {
3315 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3316 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3317 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3318 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3319 }
3320 else
3321 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3322 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3323 g_apszIemNativeHstRegNames[idxRegNew]));
3324 idxReg = idxRegNew;
3325 }
3326 }
3327 else
3328 {
3329 /*
3330 * Oops. Shadowed guest register already allocated!
3331 *
3332 * Allocate a new register, copy the value and, if updating, the
3333 * guest shadow copy assignment to the new register.
3334 */
3335 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3336 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3337 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3338 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3339
3340 /** @todo share register for readonly access. */
3341 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3342 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3343
3344 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3345 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3346
3347 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3348 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3349 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3350 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3351 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3352 else
3353 {
3354 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3355 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3356 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3357 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3358 }
3359 idxReg = idxRegNew;
3360 }
3361 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3362
3363#ifdef VBOX_STRICT
3364 /* Strict builds: Check that the value is correct. */
3365 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3366#endif
3367
3368#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3369 /** @todo r=aeichner Implement for registers other than GPR as well. */
3370 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3371 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3372 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3373 && enmGstReg <= kIemNativeGstReg_GprLast)
3374 || enmGstReg == kIemNativeGstReg_MxCsr))
3375 {
3376# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3377 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3378 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3379# endif
3380 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3381 }
3382#endif
3383
3384 return idxReg;
3385 }
3386
3387 /*
3388 * Allocate a new register, load it with the guest value and designate it as a copy of the
3389 */
3390 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3391
3392 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3393 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3394
3395 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3396 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3397 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3398 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3399
3400#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3401 /** @todo r=aeichner Implement for registers other than GPR as well. */
3402 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3403 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3404 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3405 && enmGstReg <= kIemNativeGstReg_GprLast)
3406 || enmGstReg == kIemNativeGstReg_MxCsr))
3407 {
3408# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3409 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3410 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3411# endif
3412 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3413 }
3414#endif
3415
3416 return idxRegNew;
3417}
3418
3419
3420/**
3421 * Allocates a temporary host general purpose register that already holds the
3422 * given guest register value.
3423 *
3424 * The use case for this function is places where the shadowing state cannot be
3425 * modified due to branching and such. This will fail if the we don't have a
3426 * current shadow copy handy or if it's incompatible. The only code that will
3427 * be emitted here is value checking code in strict builds.
3428 *
3429 * The intended use can only be readonly!
3430 *
3431 * @returns The host register number, UINT8_MAX if not present.
3432 * @param pReNative The native recompile state.
3433 * @param poff Pointer to the instruction buffer offset.
3434 * Will be updated in strict builds if a register is
3435 * found.
3436 * @param enmGstReg The guest register that will is to be updated.
3437 * @note In strict builds, this may throw instruction buffer growth failures.
3438 * Non-strict builds will not throw anything.
3439 * @sa iemNativeRegAllocTmpForGuestReg
3440 */
3441DECL_HIDDEN_THROW(uint8_t)
3442iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3443{
3444 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3445#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3446 AssertMsg( pReNative->idxCurCall == 0
3447 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3448 || enmGstReg == kIemNativeGstReg_Pc,
3449 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3450#endif
3451
3452 /*
3453 * First check if the guest register value is already in a host register.
3454 */
3455 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3456 {
3457 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3458 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3459 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3460 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3461
3462 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3463 {
3464 /*
3465 * We only do readonly use here, so easy compared to the other
3466 * variant of this code.
3467 */
3468 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3469 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3470 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3471 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3472 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3473
3474#ifdef VBOX_STRICT
3475 /* Strict builds: Check that the value is correct. */
3476 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3477#else
3478 RT_NOREF(poff);
3479#endif
3480 return idxReg;
3481 }
3482 }
3483
3484 return UINT8_MAX;
3485}
3486
3487
3488/**
3489 * Allocates argument registers for a function call.
3490 *
3491 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3492 * need to check the return value.
3493 * @param pReNative The native recompile state.
3494 * @param off The current code buffer offset.
3495 * @param cArgs The number of arguments the function call takes.
3496 */
3497DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3498{
3499 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3500 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3501 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3502 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3503
3504 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3505 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3506 else if (cArgs == 0)
3507 return true;
3508
3509 /*
3510 * Do we get luck and all register are free and not shadowing anything?
3511 */
3512 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3513 for (uint32_t i = 0; i < cArgs; i++)
3514 {
3515 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3516 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3517 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3518 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3519 }
3520 /*
3521 * Okay, not lucky so we have to free up the registers.
3522 */
3523 else
3524 for (uint32_t i = 0; i < cArgs; i++)
3525 {
3526 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3527 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3528 {
3529 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3530 {
3531 case kIemNativeWhat_Var:
3532 {
3533 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3534 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3535 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3536 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3537 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3538#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3539 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3540#endif
3541
3542 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3543 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3544 else
3545 {
3546 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3547 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3548 }
3549 break;
3550 }
3551
3552 case kIemNativeWhat_Tmp:
3553 case kIemNativeWhat_Arg:
3554 case kIemNativeWhat_rc:
3555 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3556 default:
3557 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3558 }
3559
3560 }
3561 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3562 {
3563 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3564 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3565 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3566#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3567 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3568#endif
3569 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3570 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3571 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3572 }
3573 else
3574 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3575 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3576 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3577 }
3578 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3579 return true;
3580}
3581
3582
3583DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3584
3585
3586#if 0
3587/**
3588 * Frees a register assignment of any type.
3589 *
3590 * @param pReNative The native recompile state.
3591 * @param idxHstReg The register to free.
3592 *
3593 * @note Does not update variables.
3594 */
3595DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3596{
3597 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3598 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3599 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3600 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3601 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3602 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3603 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3604 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3605 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3606 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3607 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3608 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3609 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3610 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3611
3612 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3613 /* no flushing, right:
3614 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3615 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3616 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3617 */
3618}
3619#endif
3620
3621
3622/**
3623 * Frees a temporary register.
3624 *
3625 * Any shadow copies of guest registers assigned to the host register will not
3626 * be flushed by this operation.
3627 */
3628DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3629{
3630 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3631 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3632 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3633 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3634 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3635}
3636
3637
3638/**
3639 * Frees a temporary immediate register.
3640 *
3641 * It is assumed that the call has not modified the register, so it still hold
3642 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3643 */
3644DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3645{
3646 iemNativeRegFreeTmp(pReNative, idxHstReg);
3647}
3648
3649
3650/**
3651 * Frees a register assigned to a variable.
3652 *
3653 * The register will be disassociated from the variable.
3654 */
3655DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3656{
3657 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3658 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3659 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3660 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3661 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3662#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3663 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3664#endif
3665
3666 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3667 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3668 if (!fFlushShadows)
3669 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3670 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3671 else
3672 {
3673 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3674 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3675#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3676 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3677#endif
3678 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3679 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3680 uint64_t fGstRegShadows = fGstRegShadowsOld;
3681 while (fGstRegShadows)
3682 {
3683 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3684 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3685
3686 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3687 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3688 }
3689 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3690 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3691 }
3692}
3693
3694
3695#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3696# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
3697/** Host CPU SIMD register names. */
3698DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3699{
3700# ifdef RT_ARCH_AMD64
3701 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3702# elif RT_ARCH_ARM64
3703 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3704 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3705# else
3706# error "port me"
3707# endif
3708};
3709# endif
3710
3711
3712/**
3713 * Frees a SIMD register assigned to a variable.
3714 *
3715 * The register will be disassociated from the variable.
3716 */
3717DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3718{
3719 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3720 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3721 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3723 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3724 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3725
3726 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3727 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3728 if (!fFlushShadows)
3729 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3730 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3731 else
3732 {
3733 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3734 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3735 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3736 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3737 uint64_t fGstRegShadows = fGstRegShadowsOld;
3738 while (fGstRegShadows)
3739 {
3740 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3741 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3742
3743 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3744 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3745 }
3746 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3747 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3748 }
3749}
3750
3751
3752/**
3753 * Reassigns a variable to a different SIMD register specified by the caller.
3754 *
3755 * @returns The new code buffer position.
3756 * @param pReNative The native recompile state.
3757 * @param off The current code buffer position.
3758 * @param idxVar The variable index.
3759 * @param idxRegOld The old host register number.
3760 * @param idxRegNew The new host register number.
3761 * @param pszCaller The caller for logging.
3762 */
3763static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3764 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3765{
3766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3767 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3768 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3769 RT_NOREF(pszCaller);
3770
3771 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3772 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3773 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3774
3775 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3776 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3777 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3778
3779 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3780 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3781 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3782
3783 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3784 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3785 else
3786 {
3787 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3788 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3789 }
3790
3791 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3792 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3793 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3794 if (fGstRegShadows)
3795 {
3796 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3797 | RT_BIT_32(idxRegNew);
3798 while (fGstRegShadows)
3799 {
3800 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3801 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3802
3803 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3804 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3805 }
3806 }
3807
3808 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3809 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3810 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3811 return off;
3812}
3813
3814
3815/**
3816 * Moves a variable to a different register or spills it onto the stack.
3817 *
3818 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3819 * kinds can easily be recreated if needed later.
3820 *
3821 * @returns The new code buffer position.
3822 * @param pReNative The native recompile state.
3823 * @param off The current code buffer position.
3824 * @param idxVar The variable index.
3825 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3826 * call-volatile registers.
3827 */
3828DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3829 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3830{
3831 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3832 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3833 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3834 Assert(!pVar->fRegAcquired);
3835 Assert(!pVar->fSimdReg);
3836
3837 uint8_t const idxRegOld = pVar->idxReg;
3838 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3839 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3840 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3841 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3842 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3843 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3844 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3845 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3846 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3847 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3848
3849 /** @todo Add statistics on this.*/
3850 /** @todo Implement basic variable liveness analysis (python) so variables
3851 * can be freed immediately once no longer used. This has the potential to
3852 * be trashing registers and stack for dead variables.
3853 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3854
3855 /*
3856 * First try move it to a different register, as that's cheaper.
3857 */
3858 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3859 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3860 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3861 if (fRegs)
3862 {
3863 /* Avoid using shadow registers, if possible. */
3864 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3865 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3866 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3867 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3868 }
3869
3870 /*
3871 * Otherwise we must spill the register onto the stack.
3872 */
3873 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3874 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3875 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3876
3877 if (pVar->cbVar == sizeof(RTUINT128U))
3878 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3879 else
3880 {
3881 Assert(pVar->cbVar == sizeof(RTUINT256U));
3882 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3883 }
3884
3885 pVar->idxReg = UINT8_MAX;
3886 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3887 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3888 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3889 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3890 return off;
3891}
3892
3893
3894/**
3895 * Called right before emitting a call instruction to move anything important
3896 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3897 * optionally freeing argument variables.
3898 *
3899 * @returns New code buffer offset, UINT32_MAX on failure.
3900 * @param pReNative The native recompile state.
3901 * @param off The code buffer offset.
3902 * @param cArgs The number of arguments the function call takes.
3903 * It is presumed that the host register part of these have
3904 * been allocated as such already and won't need moving,
3905 * just freeing.
3906 * @param fKeepVars Mask of variables that should keep their register
3907 * assignments. Caller must take care to handle these.
3908 */
3909DECL_HIDDEN_THROW(uint32_t)
3910iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3911{
3912 Assert(!cArgs); RT_NOREF(cArgs);
3913
3914 /* fKeepVars will reduce this mask. */
3915 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3916
3917 /*
3918 * Move anything important out of volatile registers.
3919 */
3920 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3921#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3922 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3923#endif
3924 ;
3925
3926 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3927 if (!fSimdRegsToMove)
3928 { /* likely */ }
3929 else
3930 {
3931 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3932 while (fSimdRegsToMove != 0)
3933 {
3934 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3935 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3936
3937 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3938 {
3939 case kIemNativeWhat_Var:
3940 {
3941 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3942 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3943 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3944 Assert(pVar->idxReg == idxSimdReg);
3945 Assert(pVar->fSimdReg);
3946 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3947 {
3948 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3949 idxVar, pVar->enmKind, pVar->idxReg));
3950 if (pVar->enmKind != kIemNativeVarKind_Stack)
3951 pVar->idxReg = UINT8_MAX;
3952 else
3953 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3954 }
3955 else
3956 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3957 continue;
3958 }
3959
3960 case kIemNativeWhat_Arg:
3961 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3962 continue;
3963
3964 case kIemNativeWhat_rc:
3965 case kIemNativeWhat_Tmp:
3966 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3967 continue;
3968
3969 case kIemNativeWhat_FixedReserved:
3970#ifdef RT_ARCH_ARM64
3971 continue; /* On ARM the upper half of the virtual 256-bit register. */
3972#endif
3973
3974 case kIemNativeWhat_FixedTmp:
3975 case kIemNativeWhat_pVCpuFixed:
3976 case kIemNativeWhat_pCtxFixed:
3977 case kIemNativeWhat_PcShadow:
3978 case kIemNativeWhat_Invalid:
3979 case kIemNativeWhat_End:
3980 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3981 }
3982 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3983 }
3984 }
3985
3986 /*
3987 * Do the actual freeing.
3988 */
3989 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3990 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3991 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3992 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3993
3994 /* If there are guest register shadows in any call-volatile register, we
3995 have to clear the corrsponding guest register masks for each register. */
3996 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3997 if (fHstSimdRegsWithGstShadow)
3998 {
3999 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4000 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4001 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4002 do
4003 {
4004 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4005 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4006
4007 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4008
4009#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4010 /*
4011 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4012 * to call volatile registers).
4013 */
4014 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4015 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4016 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4017#endif
4018 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4019 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4020
4021 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4022 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4023 } while (fHstSimdRegsWithGstShadow != 0);
4024 }
4025
4026 return off;
4027}
4028#endif
4029
4030
4031/**
4032 * Called right before emitting a call instruction to move anything important
4033 * out of call-volatile registers, free and flush the call-volatile registers,
4034 * optionally freeing argument variables.
4035 *
4036 * @returns New code buffer offset, UINT32_MAX on failure.
4037 * @param pReNative The native recompile state.
4038 * @param off The code buffer offset.
4039 * @param cArgs The number of arguments the function call takes.
4040 * It is presumed that the host register part of these have
4041 * been allocated as such already and won't need moving,
4042 * just freeing.
4043 * @param fKeepVars Mask of variables that should keep their register
4044 * assignments. Caller must take care to handle these.
4045 */
4046DECL_HIDDEN_THROW(uint32_t)
4047iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4048{
4049 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4050
4051 /* fKeepVars will reduce this mask. */
4052 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4053
4054 /*
4055 * Move anything important out of volatile registers.
4056 */
4057 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4058 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4059 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4060#ifdef IEMNATIVE_REG_FIXED_TMP0
4061 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4062#endif
4063#ifdef IEMNATIVE_REG_FIXED_TMP1
4064 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4065#endif
4066#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4067 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4068#endif
4069 & ~g_afIemNativeCallRegs[cArgs];
4070
4071 fRegsToMove &= pReNative->Core.bmHstRegs;
4072 if (!fRegsToMove)
4073 { /* likely */ }
4074 else
4075 {
4076 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4077 while (fRegsToMove != 0)
4078 {
4079 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4080 fRegsToMove &= ~RT_BIT_32(idxReg);
4081
4082 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4083 {
4084 case kIemNativeWhat_Var:
4085 {
4086 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4088 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4089 Assert(pVar->idxReg == idxReg);
4090#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4091 Assert(!pVar->fSimdReg);
4092#endif
4093 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4094 {
4095 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4096 idxVar, pVar->enmKind, pVar->idxReg));
4097 if (pVar->enmKind != kIemNativeVarKind_Stack)
4098 pVar->idxReg = UINT8_MAX;
4099 else
4100 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4101 }
4102 else
4103 fRegsToFree &= ~RT_BIT_32(idxReg);
4104 continue;
4105 }
4106
4107 case kIemNativeWhat_Arg:
4108 AssertMsgFailed(("What?!?: %u\n", idxReg));
4109 continue;
4110
4111 case kIemNativeWhat_rc:
4112 case kIemNativeWhat_Tmp:
4113 AssertMsgFailed(("Missing free: %u\n", idxReg));
4114 continue;
4115
4116 case kIemNativeWhat_FixedTmp:
4117 case kIemNativeWhat_pVCpuFixed:
4118 case kIemNativeWhat_pCtxFixed:
4119 case kIemNativeWhat_PcShadow:
4120 case kIemNativeWhat_FixedReserved:
4121 case kIemNativeWhat_Invalid:
4122 case kIemNativeWhat_End:
4123 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4124 }
4125 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4126 }
4127 }
4128
4129 /*
4130 * Do the actual freeing.
4131 */
4132 if (pReNative->Core.bmHstRegs & fRegsToFree)
4133 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4134 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4135 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4136
4137 /* If there are guest register shadows in any call-volatile register, we
4138 have to clear the corrsponding guest register masks for each register. */
4139 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4140 if (fHstRegsWithGstShadow)
4141 {
4142 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4143 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4144 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4145 do
4146 {
4147 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4148 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4149
4150 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4151
4152#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4153 /*
4154 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4155 * to call volatile registers).
4156 */
4157 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4158 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4159 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4160#endif
4161
4162 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4163 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4164 } while (fHstRegsWithGstShadow != 0);
4165 }
4166
4167#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4168 /* Now for the SIMD registers, no argument support for now. */
4169 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4170#endif
4171
4172 return off;
4173}
4174
4175
4176/**
4177 * Flushes a set of guest register shadow copies.
4178 *
4179 * This is usually done after calling a threaded function or a C-implementation
4180 * of an instruction.
4181 *
4182 * @param pReNative The native recompile state.
4183 * @param fGstRegs Set of guest registers to flush.
4184 */
4185DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4186{
4187 /*
4188 * Reduce the mask by what's currently shadowed
4189 */
4190 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4191 fGstRegs &= bmGstRegShadowsOld;
4192 if (fGstRegs)
4193 {
4194 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4195 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4196 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4197 if (bmGstRegShadowsNew)
4198 {
4199 /*
4200 * Partial.
4201 */
4202 do
4203 {
4204 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4205 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4206 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4207 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4208 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4209#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4210 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4211#endif
4212
4213 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4214 fGstRegs &= ~fInThisHstReg;
4215 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4216 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4217 if (!fGstRegShadowsNew)
4218 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4219 } while (fGstRegs != 0);
4220 }
4221 else
4222 {
4223 /*
4224 * Clear all.
4225 */
4226 do
4227 {
4228 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4229 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4230 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4231 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4232 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4233#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4234 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4235#endif
4236
4237 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4238 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4239 } while (fGstRegs != 0);
4240 pReNative->Core.bmHstRegsWithGstShadow = 0;
4241 }
4242 }
4243}
4244
4245
4246/**
4247 * Flushes guest register shadow copies held by a set of host registers.
4248 *
4249 * This is used with the TLB lookup code for ensuring that we don't carry on
4250 * with any guest shadows in volatile registers, as these will get corrupted by
4251 * a TLB miss.
4252 *
4253 * @param pReNative The native recompile state.
4254 * @param fHstRegs Set of host registers to flush guest shadows for.
4255 */
4256DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4257{
4258 /*
4259 * Reduce the mask by what's currently shadowed.
4260 */
4261 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4262 fHstRegs &= bmHstRegsWithGstShadowOld;
4263 if (fHstRegs)
4264 {
4265 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4266 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4267 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4268 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4269 if (bmHstRegsWithGstShadowNew)
4270 {
4271 /*
4272 * Partial (likely).
4273 */
4274 uint64_t fGstShadows = 0;
4275 do
4276 {
4277 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4278 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4279 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4280 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4281#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4282 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4283#endif
4284
4285 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4286 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4287 fHstRegs &= ~RT_BIT_32(idxHstReg);
4288 } while (fHstRegs != 0);
4289 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4290 }
4291 else
4292 {
4293 /*
4294 * Clear all.
4295 */
4296 do
4297 {
4298 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4299 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4300 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4301 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4302#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4303 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4304#endif
4305
4306 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4307 fHstRegs &= ~RT_BIT_32(idxHstReg);
4308 } while (fHstRegs != 0);
4309 pReNative->Core.bmGstRegShadows = 0;
4310 }
4311 }
4312}
4313
4314
4315/**
4316 * Restores guest shadow copies in volatile registers.
4317 *
4318 * This is used after calling a helper function (think TLB miss) to restore the
4319 * register state of volatile registers.
4320 *
4321 * @param pReNative The native recompile state.
4322 * @param off The code buffer offset.
4323 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4324 * be active (allocated) w/o asserting. Hack.
4325 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4326 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4327 */
4328DECL_HIDDEN_THROW(uint32_t)
4329iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4330{
4331 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4332 if (fHstRegs)
4333 {
4334 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4335 do
4336 {
4337 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4338
4339 /* It's not fatal if a register is active holding a variable that
4340 shadowing a guest register, ASSUMING all pending guest register
4341 writes were flushed prior to the helper call. However, we'll be
4342 emitting duplicate restores, so it wasts code space. */
4343 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4344 RT_NOREF(fHstRegsActiveShadows);
4345
4346 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4347#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4348 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4349#endif
4350 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4351 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4352 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4353
4354 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4355 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4356
4357 fHstRegs &= ~RT_BIT_32(idxHstReg);
4358 } while (fHstRegs != 0);
4359 }
4360 return off;
4361}
4362
4363
4364
4365
4366/*********************************************************************************************************************************
4367* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4368*********************************************************************************************************************************/
4369#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4370
4371/**
4372 * Info about shadowed guest SIMD register values.
4373 * @see IEMNATIVEGSTSIMDREG
4374 */
4375static struct
4376{
4377 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4378 uint32_t offXmm;
4379 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4380 uint32_t offYmm;
4381 /** Name (for logging). */
4382 const char *pszName;
4383} const g_aGstSimdShadowInfo[] =
4384{
4385#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4386 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4387 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4388 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4389 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4390 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4391 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4392 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4393 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4394 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4395 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4396 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4397 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4398 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4399 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4400 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4401 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4402 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4403#undef CPUMCTX_OFF_AND_SIZE
4404};
4405AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4406
4407
4408/**
4409 * Frees a temporary SIMD register.
4410 *
4411 * Any shadow copies of guest registers assigned to the host register will not
4412 * be flushed by this operation.
4413 */
4414DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4415{
4416 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4417 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4418 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4419 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4420 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4421}
4422
4423
4424/**
4425 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4426 *
4427 * @returns New code bufferoffset.
4428 * @param pReNative The native recompile state.
4429 * @param off Current code buffer position.
4430 * @param enmGstSimdReg The guest SIMD register to flush.
4431 */
4432DECL_HIDDEN_THROW(uint32_t)
4433iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4434{
4435 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4436
4437 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4438 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4439 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4440 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4441
4442 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4443 {
4444 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4445 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4446 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4447 }
4448
4449 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4450 {
4451 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4452 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4453 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4454 }
4455
4456 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4457 return off;
4458}
4459
4460
4461/**
4462 * Flush the given set of guest SIMD registers if marked as dirty.
4463 *
4464 * @returns New code buffer offset.
4465 * @param pReNative The native recompile state.
4466 * @param off Current code buffer position.
4467 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4468 */
4469DECL_HIDDEN_THROW(uint32_t)
4470iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4471{
4472 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4473 & fFlushGstSimdReg;
4474 if (bmGstSimdRegShadowDirty)
4475 {
4476# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4477 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4478 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4479# endif
4480
4481 do
4482 {
4483 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4484 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4485 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4486 } while (bmGstSimdRegShadowDirty);
4487 }
4488
4489 return off;
4490}
4491
4492
4493#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4494/**
4495 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4496 *
4497 * @returns New code buffer offset.
4498 * @param pReNative The native recompile state.
4499 * @param off Current code buffer position.
4500 * @param idxHstSimdReg The host SIMD register.
4501 *
4502 * @note This doesn't do any unshadowing of guest registers from the host register.
4503 */
4504DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4505{
4506 /* We need to flush any pending guest register writes this host register shadows. */
4507 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4508 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4509 if (bmGstSimdRegShadowDirty)
4510 {
4511# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4512 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4513 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4514# endif
4515
4516 do
4517 {
4518 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4519 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4520 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4521 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4522 } while (bmGstSimdRegShadowDirty);
4523 }
4524
4525 return off;
4526}
4527#endif
4528
4529
4530/**
4531 * Locate a register, possibly freeing one up.
4532 *
4533 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4534 * failed.
4535 *
4536 * @returns Host register number on success. Returns UINT8_MAX if no registers
4537 * found, the caller is supposed to deal with this and raise a
4538 * allocation type specific status code (if desired).
4539 *
4540 * @throws VBox status code if we're run into trouble spilling a variable of
4541 * recording debug info. Does NOT throw anything if we're out of
4542 * registers, though.
4543 */
4544static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4545 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4546{
4547 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4548 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4549 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4550
4551 /*
4552 * Try a freed register that's shadowing a guest register.
4553 */
4554 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4555 if (fRegs)
4556 {
4557 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4558
4559#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4560 /*
4561 * When we have livness information, we use it to kick out all shadowed
4562 * guest register that will not be needed any more in this TB. If we're
4563 * lucky, this may prevent us from ending up here again.
4564 *
4565 * Note! We must consider the previous entry here so we don't free
4566 * anything that the current threaded function requires (current
4567 * entry is produced by the next threaded function).
4568 */
4569 uint32_t const idxCurCall = pReNative->idxCurCall;
4570 if (idxCurCall > 0)
4571 {
4572 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4573
4574# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4575 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4576 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4577 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4578#else
4579 /* Construct a mask of the registers not in the read or write state.
4580 Note! We could skips writes, if they aren't from us, as this is just
4581 a hack to prevent trashing registers that have just been written
4582 or will be written when we retire the current instruction. */
4583 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4584 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4585 & IEMLIVENESSBIT_MASK;
4586#endif
4587 /* If it matches any shadowed registers. */
4588 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4589 {
4590 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4591 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4592 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4593
4594 /* See if we've got any unshadowed registers we can return now. */
4595 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4596 if (fUnshadowedRegs)
4597 {
4598 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4599 return (fPreferVolatile
4600 ? ASMBitFirstSetU32(fUnshadowedRegs)
4601 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4602 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4603 - 1;
4604 }
4605 }
4606 }
4607#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4608
4609 unsigned const idxReg = (fPreferVolatile
4610 ? ASMBitFirstSetU32(fRegs)
4611 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4612 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4613 - 1;
4614
4615 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4616 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4617 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4618 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4619
4620 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4621 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4622
4623 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4624 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4625 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4626 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4627 return idxReg;
4628 }
4629
4630 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4631
4632 /*
4633 * Try free up a variable that's in a register.
4634 *
4635 * We do two rounds here, first evacuating variables we don't need to be
4636 * saved on the stack, then in the second round move things to the stack.
4637 */
4638 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4639 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4640 {
4641 uint32_t fVars = pReNative->Core.bmVars;
4642 while (fVars)
4643 {
4644 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4645 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4646 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4647 continue;
4648
4649 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4650 && (RT_BIT_32(idxReg) & fRegMask)
4651 && ( iLoop == 0
4652 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4653 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4654 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4655 {
4656 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4657 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4658 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4659 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4660 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4661 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4662
4663 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4664 {
4665 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4666 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4667 }
4668
4669 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4670 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4671
4672 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4673 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4674 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4675 return idxReg;
4676 }
4677 fVars &= ~RT_BIT_32(idxVar);
4678 }
4679 }
4680
4681 AssertFailed();
4682 return UINT8_MAX;
4683}
4684
4685
4686/**
4687 * Flushes a set of guest register shadow copies.
4688 *
4689 * This is usually done after calling a threaded function or a C-implementation
4690 * of an instruction.
4691 *
4692 * @param pReNative The native recompile state.
4693 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4694 */
4695DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4696{
4697 /*
4698 * Reduce the mask by what's currently shadowed
4699 */
4700 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4701 fGstSimdRegs &= bmGstSimdRegShadows;
4702 if (fGstSimdRegs)
4703 {
4704 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4705 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4706 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4707 if (bmGstSimdRegShadowsNew)
4708 {
4709 /*
4710 * Partial.
4711 */
4712 do
4713 {
4714 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4715 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4716 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4717 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4718 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4719 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4720
4721 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4722 fGstSimdRegs &= ~fInThisHstReg;
4723 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4724 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4725 if (!fGstRegShadowsNew)
4726 {
4727 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4728 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4729 }
4730 } while (fGstSimdRegs != 0);
4731 }
4732 else
4733 {
4734 /*
4735 * Clear all.
4736 */
4737 do
4738 {
4739 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4740 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4741 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4742 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4743 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4744 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4745
4746 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4747 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4748 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4749 } while (fGstSimdRegs != 0);
4750 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4751 }
4752 }
4753}
4754
4755
4756/**
4757 * Allocates a temporary host SIMD register.
4758 *
4759 * This may emit code to save register content onto the stack in order to free
4760 * up a register.
4761 *
4762 * @returns The host register number; throws VBox status code on failure,
4763 * so no need to check the return value.
4764 * @param pReNative The native recompile state.
4765 * @param poff Pointer to the variable with the code buffer position.
4766 * This will be update if we need to move a variable from
4767 * register to stack in order to satisfy the request.
4768 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4769 * registers (@c true, default) or the other way around
4770 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4771 */
4772DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4773{
4774 /*
4775 * Try find a completely unused register, preferably a call-volatile one.
4776 */
4777 uint8_t idxSimdReg;
4778 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4779 & ~pReNative->Core.bmHstRegsWithGstShadow
4780 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4781 if (fRegs)
4782 {
4783 if (fPreferVolatile)
4784 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4785 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4786 else
4787 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4788 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4789 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4790 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4791
4792 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4793 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4794 }
4795 else
4796 {
4797 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4798 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4799 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4800 }
4801
4802 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4803 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4804}
4805
4806
4807/**
4808 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4809 * registers.
4810 *
4811 * @returns The host register number; throws VBox status code on failure,
4812 * so no need to check the return value.
4813 * @param pReNative The native recompile state.
4814 * @param poff Pointer to the variable with the code buffer position.
4815 * This will be update if we need to move a variable from
4816 * register to stack in order to satisfy the request.
4817 * @param fRegMask Mask of acceptable registers.
4818 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4819 * registers (@c true, default) or the other way around
4820 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4821 */
4822DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4823 bool fPreferVolatile /*= true*/)
4824{
4825 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4826 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4827
4828 /*
4829 * Try find a completely unused register, preferably a call-volatile one.
4830 */
4831 uint8_t idxSimdReg;
4832 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4833 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4834 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4835 & fRegMask;
4836 if (fRegs)
4837 {
4838 if (fPreferVolatile)
4839 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4840 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4841 else
4842 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4843 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4844 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4845 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4846
4847 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4848 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4849 }
4850 else
4851 {
4852 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4853 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4854 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4855 }
4856
4857 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4858 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4859}
4860
4861
4862/**
4863 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4864 *
4865 * @param pReNative The native recompile state.
4866 * @param idxHstSimdReg The host SIMD register to update the state for.
4867 * @param enmLoadSz The load size to set.
4868 */
4869DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4870 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4871{
4872 /* Everything valid already? -> nothing to do. */
4873 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4874 return;
4875
4876 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4877 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4878 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4879 {
4880 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4881 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4882 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4883 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4884 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4885 }
4886}
4887
4888
4889static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4890 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4891{
4892 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4893 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4894 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4895 {
4896# ifdef RT_ARCH_ARM64
4897 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4898 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4899# endif
4900
4901 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4902 {
4903 switch (enmLoadSzDst)
4904 {
4905 case kIemNativeGstSimdRegLdStSz_256:
4906 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4907 break;
4908 case kIemNativeGstSimdRegLdStSz_Low128:
4909 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4910 break;
4911 case kIemNativeGstSimdRegLdStSz_High128:
4912 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4913 break;
4914 default:
4915 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4916 }
4917
4918 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4919 }
4920 }
4921 else
4922 {
4923 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4924 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4925 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4926 }
4927
4928 return off;
4929}
4930
4931
4932/**
4933 * Allocates a temporary host SIMD register for keeping a guest
4934 * SIMD register value.
4935 *
4936 * Since we may already have a register holding the guest register value,
4937 * code will be emitted to do the loading if that's not the case. Code may also
4938 * be emitted if we have to free up a register to satify the request.
4939 *
4940 * @returns The host register number; throws VBox status code on failure, so no
4941 * need to check the return value.
4942 * @param pReNative The native recompile state.
4943 * @param poff Pointer to the variable with the code buffer
4944 * position. This will be update if we need to move a
4945 * variable from register to stack in order to satisfy
4946 * the request.
4947 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4948 * @param enmIntendedUse How the caller will be using the host register.
4949 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4950 * register is okay (default). The ASSUMPTION here is
4951 * that the caller has already flushed all volatile
4952 * registers, so this is only applied if we allocate a
4953 * new register.
4954 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4955 */
4956DECL_HIDDEN_THROW(uint8_t)
4957iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4958 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4959 bool fNoVolatileRegs /*= false*/)
4960{
4961 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4962#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4963 AssertMsg( pReNative->idxCurCall == 0
4964 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4965 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4966 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4967 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4968 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4969 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4970#endif
4971#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4972 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4973#endif
4974 uint32_t const fRegMask = !fNoVolatileRegs
4975 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4976 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4977
4978 /*
4979 * First check if the guest register value is already in a host register.
4980 */
4981 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4982 {
4983 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4984 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4985 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4986 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4987
4988 /* It's not supposed to be allocated... */
4989 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4990 {
4991 /*
4992 * If the register will trash the guest shadow copy, try find a
4993 * completely unused register we can use instead. If that fails,
4994 * we need to disassociate the host reg from the guest reg.
4995 */
4996 /** @todo would be nice to know if preserving the register is in any way helpful. */
4997 /* If the purpose is calculations, try duplicate the register value as
4998 we'll be clobbering the shadow. */
4999 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5000 && ( ~pReNative->Core.bmHstSimdRegs
5001 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5002 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5003 {
5004 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5005
5006 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5007
5008 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5009 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5010 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5011 idxSimdReg = idxRegNew;
5012 }
5013 /* If the current register matches the restrictions, go ahead and allocate
5014 it for the caller. */
5015 else if (fRegMask & RT_BIT_32(idxSimdReg))
5016 {
5017 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5018 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5019 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5020 {
5021 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5022 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5023 else
5024 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5025 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5026 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5027 }
5028 else
5029 {
5030 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5031 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5032 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5033 }
5034 }
5035 /* Otherwise, allocate a register that satisfies the caller and transfer
5036 the shadowing if compatible with the intended use. (This basically
5037 means the call wants a non-volatile register (RSP push/pop scenario).) */
5038 else
5039 {
5040 Assert(fNoVolatileRegs);
5041 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5042 !fNoVolatileRegs
5043 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5044 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5045 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5046 {
5047 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5048 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5049 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5050 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5051 }
5052 else
5053 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5054 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5055 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5056 idxSimdReg = idxRegNew;
5057 }
5058 }
5059 else
5060 {
5061 /*
5062 * Oops. Shadowed guest register already allocated!
5063 *
5064 * Allocate a new register, copy the value and, if updating, the
5065 * guest shadow copy assignment to the new register.
5066 */
5067 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5068 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5069 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5070 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5071
5072 /** @todo share register for readonly access. */
5073 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5074 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5075
5076 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5077 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5078 else
5079 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5080
5081 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5082 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5083 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5084 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5085 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5086 else
5087 {
5088 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5089 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5090 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5091 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5092 }
5093 idxSimdReg = idxRegNew;
5094 }
5095 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5096
5097#ifdef VBOX_STRICT
5098 /* Strict builds: Check that the value is correct. */
5099 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5100 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5101#endif
5102
5103 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5104 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5105 {
5106# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5107 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5108 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5109# endif
5110
5111 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5112 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5113 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5114 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5115 else
5116 {
5117 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5118 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5119 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5120 }
5121 }
5122
5123 return idxSimdReg;
5124 }
5125
5126 /*
5127 * Allocate a new register, load it with the guest value and designate it as a copy of the
5128 */
5129 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5130
5131 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5132 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5133 else
5134 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5135
5136 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5137 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5138
5139 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5140 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5141 {
5142# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5143 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5144 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5145# endif
5146
5147 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5148 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5149 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5150 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5151 else
5152 {
5153 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5154 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5155 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5156 }
5157 }
5158
5159 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5160 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5161
5162 return idxRegNew;
5163}
5164
5165
5166/**
5167 * Flushes guest SIMD register shadow copies held by a set of host registers.
5168 *
5169 * This is used whenever calling an external helper for ensuring that we don't carry on
5170 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5171 *
5172 * @param pReNative The native recompile state.
5173 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5174 */
5175DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5176{
5177 /*
5178 * Reduce the mask by what's currently shadowed.
5179 */
5180 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5181 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5182 if (fHstSimdRegs)
5183 {
5184 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5185 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5186 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5187 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5188 if (bmHstSimdRegsWithGstShadowNew)
5189 {
5190 /*
5191 * Partial (likely).
5192 */
5193 uint64_t fGstShadows = 0;
5194 do
5195 {
5196 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5197 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5198 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5199 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5200 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5201 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5202
5203 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5204 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5205 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5206 } while (fHstSimdRegs != 0);
5207 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5208 }
5209 else
5210 {
5211 /*
5212 * Clear all.
5213 */
5214 do
5215 {
5216 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5217 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5218 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5219 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5220 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5221 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5222
5223 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5224 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5225 } while (fHstSimdRegs != 0);
5226 pReNative->Core.bmGstSimdRegShadows = 0;
5227 }
5228 }
5229}
5230#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5231
5232
5233
5234/*********************************************************************************************************************************
5235* Code emitters for flushing pending guest register writes and sanity checks *
5236*********************************************************************************************************************************/
5237
5238#ifdef VBOX_STRICT
5239/**
5240 * Does internal register allocator sanity checks.
5241 */
5242DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5243{
5244 /*
5245 * Iterate host registers building a guest shadowing set.
5246 */
5247 uint64_t bmGstRegShadows = 0;
5248 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5249 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5250 while (bmHstRegsWithGstShadow)
5251 {
5252 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5253 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5254 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5255
5256 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5257 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5258 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5259 bmGstRegShadows |= fThisGstRegShadows;
5260 while (fThisGstRegShadows)
5261 {
5262 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5263 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5264 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5265 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5266 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5267 }
5268 }
5269 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5270 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5271 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5272
5273 /*
5274 * Now the other way around, checking the guest to host index array.
5275 */
5276 bmHstRegsWithGstShadow = 0;
5277 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5278 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5279 while (bmGstRegShadows)
5280 {
5281 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5282 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5283 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5284
5285 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5286 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5287 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5288 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5289 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5290 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5291 }
5292 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5293 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5294 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5295}
5296#endif /* VBOX_STRICT */
5297
5298
5299/**
5300 * Flushes any delayed guest register writes.
5301 *
5302 * This must be called prior to calling CImpl functions and any helpers that use
5303 * the guest state (like raising exceptions) and such.
5304 *
5305 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5306 * the caller if it wishes to do so.
5307 */
5308DECL_HIDDEN_THROW(uint32_t)
5309iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5310{
5311#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5312 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5313 off = iemNativeEmitPcWriteback(pReNative, off);
5314#else
5315 RT_NOREF(pReNative, fGstShwExcept);
5316#endif
5317
5318#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5319 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5320#endif
5321
5322#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5323 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5324#endif
5325
5326 return off;
5327}
5328
5329
5330#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5331/**
5332 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5333 */
5334DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5335{
5336 Assert(pReNative->Core.offPc);
5337# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5338 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5339 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5340# endif
5341
5342# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5343 /* Allocate a temporary PC register. */
5344 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5345
5346 /* Perform the addition and store the result. */
5347 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5348 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5349
5350 /* Free but don't flush the PC register. */
5351 iemNativeRegFreeTmp(pReNative, idxPcReg);
5352# else
5353 /* Compare the shadow with the context value, they should match. */
5354 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5355 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5356# endif
5357
5358 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5359 pReNative->Core.offPc = 0;
5360 pReNative->Core.cInstrPcUpdateSkipped = 0;
5361
5362 return off;
5363}
5364#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5365
5366
5367/*********************************************************************************************************************************
5368* Code Emitters (larger snippets) *
5369*********************************************************************************************************************************/
5370
5371/**
5372 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5373 * extending to 64-bit width.
5374 *
5375 * @returns New code buffer offset on success, UINT32_MAX on failure.
5376 * @param pReNative .
5377 * @param off The current code buffer position.
5378 * @param idxHstReg The host register to load the guest register value into.
5379 * @param enmGstReg The guest register to load.
5380 *
5381 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5382 * that is something the caller needs to do if applicable.
5383 */
5384DECL_HIDDEN_THROW(uint32_t)
5385iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5386{
5387 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5388 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5389
5390 switch (g_aGstShadowInfo[enmGstReg].cb)
5391 {
5392 case sizeof(uint64_t):
5393 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5394 case sizeof(uint32_t):
5395 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5396 case sizeof(uint16_t):
5397 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5398#if 0 /* not present in the table. */
5399 case sizeof(uint8_t):
5400 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5401#endif
5402 default:
5403 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5404 }
5405}
5406
5407
5408#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5409/**
5410 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5411 *
5412 * @returns New code buffer offset on success, UINT32_MAX on failure.
5413 * @param pReNative The recompiler state.
5414 * @param off The current code buffer position.
5415 * @param idxHstSimdReg The host register to load the guest register value into.
5416 * @param enmGstSimdReg The guest register to load.
5417 * @param enmLoadSz The load size of the register.
5418 *
5419 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5420 * that is something the caller needs to do if applicable.
5421 */
5422DECL_HIDDEN_THROW(uint32_t)
5423iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5424 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5425{
5426 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5427
5428 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5429 switch (enmLoadSz)
5430 {
5431 case kIemNativeGstSimdRegLdStSz_256:
5432 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5433 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5434 case kIemNativeGstSimdRegLdStSz_Low128:
5435 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5436 case kIemNativeGstSimdRegLdStSz_High128:
5437 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5438 default:
5439 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5440 }
5441}
5442#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5443
5444#ifdef VBOX_STRICT
5445
5446/**
5447 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5448 *
5449 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5450 * Trashes EFLAGS on AMD64.
5451 */
5452DECL_HIDDEN_THROW(uint32_t)
5453iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5454{
5455# ifdef RT_ARCH_AMD64
5456 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5457
5458 /* rol reg64, 32 */
5459 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5460 pbCodeBuf[off++] = 0xc1;
5461 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5462 pbCodeBuf[off++] = 32;
5463
5464 /* test reg32, ffffffffh */
5465 if (idxReg >= 8)
5466 pbCodeBuf[off++] = X86_OP_REX_B;
5467 pbCodeBuf[off++] = 0xf7;
5468 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5469 pbCodeBuf[off++] = 0xff;
5470 pbCodeBuf[off++] = 0xff;
5471 pbCodeBuf[off++] = 0xff;
5472 pbCodeBuf[off++] = 0xff;
5473
5474 /* je/jz +1 */
5475 pbCodeBuf[off++] = 0x74;
5476 pbCodeBuf[off++] = 0x01;
5477
5478 /* int3 */
5479 pbCodeBuf[off++] = 0xcc;
5480
5481 /* rol reg64, 32 */
5482 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5483 pbCodeBuf[off++] = 0xc1;
5484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5485 pbCodeBuf[off++] = 32;
5486
5487# elif defined(RT_ARCH_ARM64)
5488 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5489 /* lsr tmp0, reg64, #32 */
5490 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5491 /* cbz tmp0, +1 */
5492 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5493 /* brk #0x1100 */
5494 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5495
5496# else
5497# error "Port me!"
5498# endif
5499 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5500 return off;
5501}
5502
5503
5504/**
5505 * Emitting code that checks that the content of register @a idxReg is the same
5506 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5507 * instruction if that's not the case.
5508 *
5509 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5510 * Trashes EFLAGS on AMD64.
5511 */
5512DECL_HIDDEN_THROW(uint32_t)
5513iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5514{
5515#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5516 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5517 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5518 return off;
5519#endif
5520
5521# ifdef RT_ARCH_AMD64
5522 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5523
5524 /* cmp reg, [mem] */
5525 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5526 {
5527 if (idxReg >= 8)
5528 pbCodeBuf[off++] = X86_OP_REX_R;
5529 pbCodeBuf[off++] = 0x38;
5530 }
5531 else
5532 {
5533 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5534 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5535 else
5536 {
5537 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5538 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5539 else
5540 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5541 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5542 if (idxReg >= 8)
5543 pbCodeBuf[off++] = X86_OP_REX_R;
5544 }
5545 pbCodeBuf[off++] = 0x39;
5546 }
5547 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5548
5549 /* je/jz +1 */
5550 pbCodeBuf[off++] = 0x74;
5551 pbCodeBuf[off++] = 0x01;
5552
5553 /* int3 */
5554 pbCodeBuf[off++] = 0xcc;
5555
5556 /* For values smaller than the register size, we must check that the rest
5557 of the register is all zeros. */
5558 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5559 {
5560 /* test reg64, imm32 */
5561 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5562 pbCodeBuf[off++] = 0xf7;
5563 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5564 pbCodeBuf[off++] = 0;
5565 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5566 pbCodeBuf[off++] = 0xff;
5567 pbCodeBuf[off++] = 0xff;
5568
5569 /* je/jz +1 */
5570 pbCodeBuf[off++] = 0x74;
5571 pbCodeBuf[off++] = 0x01;
5572
5573 /* int3 */
5574 pbCodeBuf[off++] = 0xcc;
5575 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5576 }
5577 else
5578 {
5579 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5580 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5581 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5582 }
5583
5584# elif defined(RT_ARCH_ARM64)
5585 /* mov TMP0, [gstreg] */
5586 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5587
5588 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5589 /* sub tmp0, tmp0, idxReg */
5590 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5591 /* cbz tmp0, +1 */
5592 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5593 /* brk #0x1000+enmGstReg */
5594 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5595 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5596
5597# else
5598# error "Port me!"
5599# endif
5600 return off;
5601}
5602
5603
5604# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5605# ifdef RT_ARCH_AMD64
5606/**
5607 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5608 */
5609DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5610{
5611 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5612 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5613 if (idxSimdReg >= 8)
5614 pbCodeBuf[off++] = X86_OP_REX_R;
5615 pbCodeBuf[off++] = 0x0f;
5616 pbCodeBuf[off++] = 0x38;
5617 pbCodeBuf[off++] = 0x29;
5618 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5619
5620 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5621 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5622 pbCodeBuf[off++] = X86_OP_REX_W
5623 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5624 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5625 pbCodeBuf[off++] = 0x0f;
5626 pbCodeBuf[off++] = 0x3a;
5627 pbCodeBuf[off++] = 0x16;
5628 pbCodeBuf[off++] = 0xeb;
5629 pbCodeBuf[off++] = 0x00;
5630
5631 /* cmp tmp0, 0xffffffffffffffff. */
5632 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5633 pbCodeBuf[off++] = 0x83;
5634 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5635 pbCodeBuf[off++] = 0xff;
5636
5637 /* je/jz +1 */
5638 pbCodeBuf[off++] = 0x74;
5639 pbCodeBuf[off++] = 0x01;
5640
5641 /* int3 */
5642 pbCodeBuf[off++] = 0xcc;
5643
5644 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5645 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5646 pbCodeBuf[off++] = X86_OP_REX_W
5647 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5648 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5649 pbCodeBuf[off++] = 0x0f;
5650 pbCodeBuf[off++] = 0x3a;
5651 pbCodeBuf[off++] = 0x16;
5652 pbCodeBuf[off++] = 0xeb;
5653 pbCodeBuf[off++] = 0x01;
5654
5655 /* cmp tmp0, 0xffffffffffffffff. */
5656 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5657 pbCodeBuf[off++] = 0x83;
5658 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5659 pbCodeBuf[off++] = 0xff;
5660
5661 /* je/jz +1 */
5662 pbCodeBuf[off++] = 0x74;
5663 pbCodeBuf[off++] = 0x01;
5664
5665 /* int3 */
5666 pbCodeBuf[off++] = 0xcc;
5667
5668 return off;
5669}
5670# endif
5671
5672
5673/**
5674 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5675 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5676 * instruction if that's not the case.
5677 *
5678 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5679 * Trashes EFLAGS on AMD64.
5680 */
5681DECL_HIDDEN_THROW(uint32_t)
5682iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5683 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5684{
5685 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5686 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5687 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5688 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5689 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5690 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5691 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5692 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5693 return off;
5694
5695# ifdef RT_ARCH_AMD64
5696 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5697 {
5698 /* movdqa vectmp0, idxSimdReg */
5699 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5700
5701 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5702
5703 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5704 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5705 }
5706
5707 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5708 {
5709 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5710 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5711
5712 /* vextracti128 vectmp0, idxSimdReg, 1 */
5713 pbCodeBuf[off++] = X86_OP_VEX3;
5714 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5715 | X86_OP_VEX3_BYTE1_X
5716 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5717 | 0x03; /* Opcode map */
5718 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5719 pbCodeBuf[off++] = 0x39;
5720 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5721 pbCodeBuf[off++] = 0x01;
5722
5723 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5724 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5725 }
5726# elif defined(RT_ARCH_ARM64)
5727 /* mov vectmp0, [gstreg] */
5728 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5729
5730 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5731 {
5732 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5733 /* eor vectmp0, vectmp0, idxSimdReg */
5734 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5735 /* uaddlv vectmp0, vectmp0.16B */
5736 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5737 /* umov tmp0, vectmp0.H[0] */
5738 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5739 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5740 /* cbz tmp0, +1 */
5741 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5742 /* brk #0x1000+enmGstReg */
5743 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5744 }
5745
5746 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5747 {
5748 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5749 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5750 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5751 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5752 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5753 /* umov tmp0, (vectmp0 + 1).H[0] */
5754 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5755 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5756 /* cbz tmp0, +1 */
5757 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5758 /* brk #0x1000+enmGstReg */
5759 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5760 }
5761
5762# else
5763# error "Port me!"
5764# endif
5765
5766 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5767 return off;
5768}
5769# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5770
5771
5772/**
5773 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5774 * important bits.
5775 *
5776 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5777 * Trashes EFLAGS on AMD64.
5778 */
5779DECL_HIDDEN_THROW(uint32_t)
5780iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5781{
5782 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5783 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5784 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5785 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5786
5787#ifdef RT_ARCH_AMD64
5788 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5789
5790 /* je/jz +1 */
5791 pbCodeBuf[off++] = 0x74;
5792 pbCodeBuf[off++] = 0x01;
5793
5794 /* int3 */
5795 pbCodeBuf[off++] = 0xcc;
5796
5797# elif defined(RT_ARCH_ARM64)
5798 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5799
5800 /* b.eq +1 */
5801 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5802 /* brk #0x2000 */
5803 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5804
5805# else
5806# error "Port me!"
5807# endif
5808 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5809
5810 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5811 return off;
5812}
5813
5814#endif /* VBOX_STRICT */
5815
5816
5817#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5818/**
5819 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5820 */
5821DECL_HIDDEN_THROW(uint32_t)
5822iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5823{
5824 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5825
5826 fEflNeeded &= X86_EFL_STATUS_BITS;
5827 if (fEflNeeded)
5828 {
5829# ifdef RT_ARCH_AMD64
5830 /* test dword [pVCpu + offVCpu], imm32 */
5831 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5832 if (fEflNeeded <= 0xff)
5833 {
5834 pCodeBuf[off++] = 0xf6;
5835 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5836 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5837 }
5838 else
5839 {
5840 pCodeBuf[off++] = 0xf7;
5841 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5842 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5843 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5844 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5845 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5846 }
5847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5848
5849# else
5850 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5851 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5852 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5853# ifdef RT_ARCH_ARM64
5854 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5855 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5856# else
5857# error "Port me!"
5858# endif
5859 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5860# endif
5861 }
5862 return off;
5863}
5864#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5865
5866
5867/**
5868 * Emits a code for checking the return code of a call and rcPassUp, returning
5869 * from the code if either are non-zero.
5870 */
5871DECL_HIDDEN_THROW(uint32_t)
5872iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5873{
5874#ifdef RT_ARCH_AMD64
5875 /*
5876 * AMD64: eax = call status code.
5877 */
5878
5879 /* edx = rcPassUp */
5880 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5881# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5882 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5883# endif
5884
5885 /* edx = eax | rcPassUp */
5886 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5887 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5888 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5890
5891 /* Jump to non-zero status return path. */
5892 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5893
5894 /* done. */
5895
5896#elif RT_ARCH_ARM64
5897 /*
5898 * ARM64: w0 = call status code.
5899 */
5900# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5901 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5902# endif
5903 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5904
5905 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5906
5907 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5908
5909 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5910 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5911 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5912
5913#else
5914# error "port me"
5915#endif
5916 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5917 RT_NOREF_PV(idxInstr);
5918 return off;
5919}
5920
5921
5922/**
5923 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5924 * raising a \#GP(0) if it isn't.
5925 *
5926 * @returns New code buffer offset, UINT32_MAX on failure.
5927 * @param pReNative The native recompile state.
5928 * @param off The code buffer offset.
5929 * @param idxAddrReg The host register with the address to check.
5930 * @param idxInstr The current instruction.
5931 */
5932DECL_HIDDEN_THROW(uint32_t)
5933iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5934{
5935 /*
5936 * Make sure we don't have any outstanding guest register writes as we may
5937 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5938 */
5939 off = iemNativeRegFlushPendingWrites(pReNative, off);
5940
5941#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5942 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5943#else
5944 RT_NOREF(idxInstr);
5945#endif
5946
5947#ifdef RT_ARCH_AMD64
5948 /*
5949 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5950 * return raisexcpt();
5951 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5952 */
5953 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5954
5955 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5956 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5957 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5958 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5959 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5960
5961 iemNativeRegFreeTmp(pReNative, iTmpReg);
5962
5963#elif defined(RT_ARCH_ARM64)
5964 /*
5965 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5966 * return raisexcpt();
5967 * ----
5968 * mov x1, 0x800000000000
5969 * add x1, x0, x1
5970 * cmp xzr, x1, lsr 48
5971 * b.ne .Lraisexcpt
5972 */
5973 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5974
5975 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5976 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5977 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5978 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5979
5980 iemNativeRegFreeTmp(pReNative, iTmpReg);
5981
5982#else
5983# error "Port me"
5984#endif
5985 return off;
5986}
5987
5988
5989/**
5990 * Emits code to check if that the content of @a idxAddrReg is within the limit
5991 * of CS, raising a \#GP(0) if it isn't.
5992 *
5993 * @returns New code buffer offset; throws VBox status code on error.
5994 * @param pReNative The native recompile state.
5995 * @param off The code buffer offset.
5996 * @param idxAddrReg The host register (32-bit) with the address to
5997 * check.
5998 * @param idxInstr The current instruction.
5999 */
6000DECL_HIDDEN_THROW(uint32_t)
6001iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6002 uint8_t idxAddrReg, uint8_t idxInstr)
6003{
6004 /*
6005 * Make sure we don't have any outstanding guest register writes as we may
6006 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6007 */
6008 off = iemNativeRegFlushPendingWrites(pReNative, off);
6009
6010#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6011 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6012#else
6013 RT_NOREF(idxInstr);
6014#endif
6015
6016 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6017 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6018 kIemNativeGstRegUse_ReadOnly);
6019
6020 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6021 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6022
6023 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6024 return off;
6025}
6026
6027
6028/**
6029 * Emits a call to a CImpl function or something similar.
6030 */
6031DECL_HIDDEN_THROW(uint32_t)
6032iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6033 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6034{
6035 /* Writeback everything. */
6036 off = iemNativeRegFlushPendingWrites(pReNative, off);
6037
6038 /*
6039 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6040 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6041 */
6042 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6043 fGstShwFlush
6044 | RT_BIT_64(kIemNativeGstReg_Pc)
6045 | RT_BIT_64(kIemNativeGstReg_EFlags));
6046 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6047
6048 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6049
6050 /*
6051 * Load the parameters.
6052 */
6053#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6054 /* Special code the hidden VBOXSTRICTRC pointer. */
6055 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6056 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6057 if (cAddParams > 0)
6058 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6059 if (cAddParams > 1)
6060 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6061 if (cAddParams > 2)
6062 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6063 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6064
6065#else
6066 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6067 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6068 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6069 if (cAddParams > 0)
6070 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6071 if (cAddParams > 1)
6072 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6073 if (cAddParams > 2)
6074# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6075 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6076# else
6077 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6078# endif
6079#endif
6080
6081 /*
6082 * Make the call.
6083 */
6084 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6085
6086#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6087 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6088#endif
6089
6090 /*
6091 * Check the status code.
6092 */
6093 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6094}
6095
6096
6097/**
6098 * Emits a call to a threaded worker function.
6099 */
6100DECL_HIDDEN_THROW(uint32_t)
6101iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6102{
6103 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6104
6105 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6106 off = iemNativeRegFlushPendingWrites(pReNative, off);
6107
6108 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6109 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6110
6111#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6112 /* The threaded function may throw / long jmp, so set current instruction
6113 number if we're counting. */
6114 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6115#endif
6116
6117 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6118
6119#ifdef RT_ARCH_AMD64
6120 /* Load the parameters and emit the call. */
6121# ifdef RT_OS_WINDOWS
6122# ifndef VBOXSTRICTRC_STRICT_ENABLED
6123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6124 if (cParams > 0)
6125 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6126 if (cParams > 1)
6127 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6128 if (cParams > 2)
6129 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6130# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6131 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6132 if (cParams > 0)
6133 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6134 if (cParams > 1)
6135 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6136 if (cParams > 2)
6137 {
6138 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6139 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6140 }
6141 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6142# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6143# else
6144 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6145 if (cParams > 0)
6146 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6147 if (cParams > 1)
6148 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6149 if (cParams > 2)
6150 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6151# endif
6152
6153 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6154
6155# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6156 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6157# endif
6158
6159#elif RT_ARCH_ARM64
6160 /*
6161 * ARM64:
6162 */
6163 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6164 if (cParams > 0)
6165 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6166 if (cParams > 1)
6167 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6168 if (cParams > 2)
6169 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6170
6171 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6172
6173#else
6174# error "port me"
6175#endif
6176
6177 /*
6178 * Check the status code.
6179 */
6180 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6181
6182 return off;
6183}
6184
6185#ifdef VBOX_WITH_STATISTICS
6186
6187/**
6188 * Emits code to update the thread call statistics.
6189 */
6190DECL_INLINE_THROW(uint32_t)
6191iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6192{
6193 /*
6194 * Update threaded function stats.
6195 */
6196 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6197 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6198# if defined(RT_ARCH_ARM64)
6199 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6200 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6201 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6202 iemNativeRegFreeTmp(pReNative, idxTmp1);
6203 iemNativeRegFreeTmp(pReNative, idxTmp2);
6204# else
6205 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6206# endif
6207 return off;
6208}
6209
6210
6211/**
6212 * Emits code to update the TB exit reason statistics.
6213 */
6214DECL_INLINE_THROW(uint32_t)
6215iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6216{
6217 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6218 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6219 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6220 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6221 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6222
6223 return off;
6224}
6225
6226#endif /* VBOX_WITH_STATISTICS */
6227
6228/**
6229 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6230 */
6231static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6232{
6233 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6234 if (idxLabel != UINT32_MAX)
6235 {
6236 iemNativeLabelDefine(pReNative, idxLabel, off);
6237 /* set the return status */
6238 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6239 /* jump back to the return sequence. */
6240 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6241 }
6242 return off;
6243}
6244
6245
6246/**
6247 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6248 */
6249static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6250{
6251 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6252 if (idxLabel != UINT32_MAX)
6253 {
6254 iemNativeLabelDefine(pReNative, idxLabel, off);
6255 /* set the return status */
6256 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6257 /* jump back to the return sequence. */
6258 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6259 }
6260 return off;
6261}
6262
6263
6264/**
6265 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6266 */
6267static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6268{
6269 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6270 if (idxLabel != UINT32_MAX)
6271 {
6272 iemNativeLabelDefine(pReNative, idxLabel, off);
6273 /* set the return status */
6274 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6275 /* jump back to the return sequence. */
6276 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6277 }
6278 return off;
6279}
6280
6281
6282/**
6283 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6284 */
6285static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6286{
6287 /*
6288 * Generate the rc + rcPassUp fiddling code if needed.
6289 */
6290 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6291 if (idxLabel != UINT32_MAX)
6292 {
6293 iemNativeLabelDefine(pReNative, idxLabel, off);
6294
6295 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6296#ifdef RT_ARCH_AMD64
6297# ifdef RT_OS_WINDOWS
6298# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6299 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6300# endif
6301 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6302 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6303# else
6304 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6305 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6306# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6307 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6308# endif
6309# endif
6310# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6311 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6312# endif
6313
6314#else
6315 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6316 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6317 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6318#endif
6319
6320 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6321 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6322 }
6323 return off;
6324}
6325
6326
6327/**
6328 * Emits a standard epilog.
6329 */
6330static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6331{
6332 *pidxReturnLabel = UINT32_MAX;
6333
6334 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6335 off = iemNativeRegFlushPendingWrites(pReNative, off);
6336
6337 /*
6338 * Successful return, so clear the return register (eax, w0).
6339 */
6340 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6341 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6342
6343 /*
6344 * Define label for common return point.
6345 */
6346 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6347 *pidxReturnLabel = idxReturn;
6348
6349 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6350
6351 /*
6352 * Restore registers and return.
6353 */
6354#ifdef RT_ARCH_AMD64
6355 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6356
6357 /* Reposition esp at the r15 restore point. */
6358 pbCodeBuf[off++] = X86_OP_REX_W;
6359 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6361 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6362
6363 /* Pop non-volatile registers and return */
6364 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6365 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6366 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6367 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6368 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6369 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6370 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6371 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6372# ifdef RT_OS_WINDOWS
6373 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6374 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6375# endif
6376 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6377 pbCodeBuf[off++] = 0xc9; /* leave */
6378 pbCodeBuf[off++] = 0xc3; /* ret */
6379 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6380
6381#elif RT_ARCH_ARM64
6382 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6383
6384 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6385 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6386 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6387 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6388 IEMNATIVE_FRAME_VAR_SIZE / 8);
6389 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6390 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6391 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6392 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6393 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6394 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6395 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6396 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6397 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6398 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6399 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6400 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6401
6402 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6403 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6404 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6405 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6406
6407 /* retab / ret */
6408# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6409 if (1)
6410 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6411 else
6412# endif
6413 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6414
6415#else
6416# error "port me"
6417#endif
6418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6419
6420 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6421 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6422
6423 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6424}
6425
6426
6427#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6428/**
6429 * Emits a standard prolog.
6430 */
6431static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6432{
6433#ifdef RT_ARCH_AMD64
6434 /*
6435 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6436 * reserving 64 bytes for stack variables plus 4 non-register argument
6437 * slots. Fixed register assignment: xBX = pReNative;
6438 *
6439 * Since we always do the same register spilling, we can use the same
6440 * unwind description for all the code.
6441 */
6442 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6443 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6444 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6445 pbCodeBuf[off++] = 0x8b;
6446 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6447 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6448 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6449# ifdef RT_OS_WINDOWS
6450 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6451 pbCodeBuf[off++] = 0x8b;
6452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6453 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6454 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6455# else
6456 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6457 pbCodeBuf[off++] = 0x8b;
6458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6459# endif
6460 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6461 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6462 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6463 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6464 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6465 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6466 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6467 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6468
6469# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6470 /* Save the frame pointer. */
6471 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6472# endif
6473
6474 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6475 X86_GREG_xSP,
6476 IEMNATIVE_FRAME_ALIGN_SIZE
6477 + IEMNATIVE_FRAME_VAR_SIZE
6478 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6479 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6480 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6481 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6482 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6483
6484#elif RT_ARCH_ARM64
6485 /*
6486 * We set up a stack frame exactly like on x86, only we have to push the
6487 * return address our selves here. We save all non-volatile registers.
6488 */
6489 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6490
6491# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6492 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6493 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6494 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6495 /* pacibsp */
6496 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6497# endif
6498
6499 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6500 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6501 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6502 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6503 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6504 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6505 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6506 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6507 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6508 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6509 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6510 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6511 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6512 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6513 /* Save the BP and LR (ret address) registers at the top of the frame. */
6514 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6515 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6516 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6517 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6518 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6519 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6520
6521 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6522 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6523
6524 /* mov r28, r0 */
6525 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6526 /* mov r27, r1 */
6527 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6528
6529# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6530 /* Save the frame pointer. */
6531 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6532 ARMV8_A64_REG_X2);
6533# endif
6534
6535#else
6536# error "port me"
6537#endif
6538 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6539 return off;
6540}
6541#endif
6542
6543
6544/*********************************************************************************************************************************
6545* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6546*********************************************************************************************************************************/
6547
6548/**
6549 * Internal work that allocates a variable with kind set to
6550 * kIemNativeVarKind_Invalid and no current stack allocation.
6551 *
6552 * The kind will either be set by the caller or later when the variable is first
6553 * assigned a value.
6554 *
6555 * @returns Unpacked index.
6556 * @internal
6557 */
6558static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6559{
6560 Assert(cbType > 0 && cbType <= 64);
6561 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6562 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6563 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6564 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6565 pReNative->Core.aVars[idxVar].cbVar = cbType;
6566 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6567 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6568 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6569 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6570 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6571 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6572 pReNative->Core.aVars[idxVar].u.uValue = 0;
6573#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6574 pReNative->Core.aVars[idxVar].fSimdReg = false;
6575#endif
6576 return idxVar;
6577}
6578
6579
6580/**
6581 * Internal work that allocates an argument variable w/o setting enmKind.
6582 *
6583 * @returns Unpacked index.
6584 * @internal
6585 */
6586static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6587{
6588 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6589 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6590 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6591
6592 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6593 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6594 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6595 return idxVar;
6596}
6597
6598
6599/**
6600 * Gets the stack slot for a stack variable, allocating one if necessary.
6601 *
6602 * Calling this function implies that the stack slot will contain a valid
6603 * variable value. The caller deals with any register currently assigned to the
6604 * variable, typically by spilling it into the stack slot.
6605 *
6606 * @returns The stack slot number.
6607 * @param pReNative The recompiler state.
6608 * @param idxVar The variable.
6609 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6610 */
6611DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6612{
6613 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6614 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6615 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6616
6617 /* Already got a slot? */
6618 uint8_t const idxStackSlot = pVar->idxStackSlot;
6619 if (idxStackSlot != UINT8_MAX)
6620 {
6621 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6622 return idxStackSlot;
6623 }
6624
6625 /*
6626 * A single slot is easy to allocate.
6627 * Allocate them from the top end, closest to BP, to reduce the displacement.
6628 */
6629 if (pVar->cbVar <= sizeof(uint64_t))
6630 {
6631 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6632 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6633 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6634 pVar->idxStackSlot = (uint8_t)iSlot;
6635 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6636 return (uint8_t)iSlot;
6637 }
6638
6639 /*
6640 * We need more than one stack slot.
6641 *
6642 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6643 */
6644 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6645 Assert(pVar->cbVar <= 64);
6646 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6647 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6648 uint32_t bmStack = pReNative->Core.bmStack;
6649 while (bmStack != UINT32_MAX)
6650 {
6651 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6652 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6653 iSlot = (iSlot - 1) & ~fBitAlignMask;
6654 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6655 {
6656 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6657 pVar->idxStackSlot = (uint8_t)iSlot;
6658 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6659 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6660 return (uint8_t)iSlot;
6661 }
6662
6663 bmStack |= (fBitAllocMask << iSlot);
6664 }
6665 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6666}
6667
6668
6669/**
6670 * Changes the variable to a stack variable.
6671 *
6672 * Currently this is s only possible to do the first time the variable is used,
6673 * switching later is can be implemented but not done.
6674 *
6675 * @param pReNative The recompiler state.
6676 * @param idxVar The variable.
6677 * @throws VERR_IEM_VAR_IPE_2
6678 */
6679DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6680{
6681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6682 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6683 if (pVar->enmKind != kIemNativeVarKind_Stack)
6684 {
6685 /* We could in theory transition from immediate to stack as well, but it
6686 would involve the caller doing work storing the value on the stack. So,
6687 till that's required we only allow transition from invalid. */
6688 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6689 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6690 pVar->enmKind = kIemNativeVarKind_Stack;
6691
6692 /* Note! We don't allocate a stack slot here, that's only done when a
6693 slot is actually needed to hold a variable value. */
6694 }
6695}
6696
6697
6698/**
6699 * Sets it to a variable with a constant value.
6700 *
6701 * This does not require stack storage as we know the value and can always
6702 * reload it, unless of course it's referenced.
6703 *
6704 * @param pReNative The recompiler state.
6705 * @param idxVar The variable.
6706 * @param uValue The immediate value.
6707 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6708 */
6709DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6710{
6711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6712 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6713 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6714 {
6715 /* Only simple transitions for now. */
6716 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6717 pVar->enmKind = kIemNativeVarKind_Immediate;
6718 }
6719 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6720
6721 pVar->u.uValue = uValue;
6722 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6723 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6724 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6725}
6726
6727
6728/**
6729 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6730 *
6731 * This does not require stack storage as we know the value and can always
6732 * reload it. Loading is postponed till needed.
6733 *
6734 * @param pReNative The recompiler state.
6735 * @param idxVar The variable. Unpacked.
6736 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6737 *
6738 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6739 * @internal
6740 */
6741static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6742{
6743 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6744 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6745
6746 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6747 {
6748 /* Only simple transitions for now. */
6749 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6750 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6751 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6752 }
6753 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6754
6755 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6756
6757 /* Update the other variable, ensure it's a stack variable. */
6758 /** @todo handle variables with const values... that'll go boom now. */
6759 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6760 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6761}
6762
6763
6764/**
6765 * Sets the variable to a reference (pointer) to a guest register reference.
6766 *
6767 * This does not require stack storage as we know the value and can always
6768 * reload it. Loading is postponed till needed.
6769 *
6770 * @param pReNative The recompiler state.
6771 * @param idxVar The variable.
6772 * @param enmRegClass The class guest registers to reference.
6773 * @param idxReg The register within @a enmRegClass to reference.
6774 *
6775 * @throws VERR_IEM_VAR_IPE_2
6776 */
6777DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6778 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6779{
6780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6781 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6782
6783 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6784 {
6785 /* Only simple transitions for now. */
6786 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6787 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6788 }
6789 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6790
6791 pVar->u.GstRegRef.enmClass = enmRegClass;
6792 pVar->u.GstRegRef.idx = idxReg;
6793}
6794
6795
6796DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6797{
6798 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6799}
6800
6801
6802DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6803{
6804 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6805
6806 /* Since we're using a generic uint64_t value type, we must truncate it if
6807 the variable is smaller otherwise we may end up with too large value when
6808 scaling up a imm8 w/ sign-extension.
6809
6810 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6811 in the bios, bx=1) when running on arm, because clang expect 16-bit
6812 register parameters to have bits 16 and up set to zero. Instead of
6813 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6814 CF value in the result. */
6815 switch (cbType)
6816 {
6817 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6818 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6819 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6820 }
6821 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6822 return idxVar;
6823}
6824
6825
6826DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6827{
6828 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6829 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6830 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6831 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6832 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6833 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6834
6835 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6836 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6837 return idxArgVar;
6838}
6839
6840
6841DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6842{
6843 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6844 /* Don't set to stack now, leave that to the first use as for instance
6845 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6846 return idxVar;
6847}
6848
6849
6850DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6851{
6852 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6853
6854 /* Since we're using a generic uint64_t value type, we must truncate it if
6855 the variable is smaller otherwise we may end up with too large value when
6856 scaling up a imm8 w/ sign-extension. */
6857 switch (cbType)
6858 {
6859 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6860 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6861 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6862 }
6863 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6864 return idxVar;
6865}
6866
6867
6868DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6869{
6870 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6871 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6872
6873 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6874 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6875
6876 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6877
6878 /* Truncate the value to this variables size. */
6879 switch (cbType)
6880 {
6881 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6882 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6883 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6884 }
6885
6886 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6887 iemNativeVarRegisterRelease(pReNative, idxVar);
6888 return idxVar;
6889}
6890
6891
6892/**
6893 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6894 * fixed till we call iemNativeVarRegisterRelease.
6895 *
6896 * @returns The host register number.
6897 * @param pReNative The recompiler state.
6898 * @param idxVar The variable.
6899 * @param poff Pointer to the instruction buffer offset.
6900 * In case a register needs to be freed up or the value
6901 * loaded off the stack.
6902 * @param fInitialized Set if the variable must already have been initialized.
6903 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6904 * the case.
6905 * @param idxRegPref Preferred register number or UINT8_MAX.
6906 */
6907DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6908 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6909{
6910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6911 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6912 Assert(pVar->cbVar <= 8);
6913 Assert(!pVar->fRegAcquired);
6914
6915 uint8_t idxReg = pVar->idxReg;
6916 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6917 {
6918 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6919 && pVar->enmKind < kIemNativeVarKind_End);
6920 pVar->fRegAcquired = true;
6921 return idxReg;
6922 }
6923
6924 /*
6925 * If the kind of variable has not yet been set, default to 'stack'.
6926 */
6927 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6928 && pVar->enmKind < kIemNativeVarKind_End);
6929 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6930 iemNativeVarSetKindToStack(pReNative, idxVar);
6931
6932 /*
6933 * We have to allocate a register for the variable, even if its a stack one
6934 * as we don't know if there are modification being made to it before its
6935 * finalized (todo: analyze and insert hints about that?).
6936 *
6937 * If we can, we try get the correct register for argument variables. This
6938 * is assuming that most argument variables are fetched as close as possible
6939 * to the actual call, so that there aren't any interfering hidden calls
6940 * (memory accesses, etc) inbetween.
6941 *
6942 * If we cannot or it's a variable, we make sure no argument registers
6943 * that will be used by this MC block will be allocated here, and we always
6944 * prefer non-volatile registers to avoid needing to spill stuff for internal
6945 * call.
6946 */
6947 /** @todo Detect too early argument value fetches and warn about hidden
6948 * calls causing less optimal code to be generated in the python script. */
6949
6950 uint8_t const uArgNo = pVar->uArgNo;
6951 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6952 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6953 {
6954 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6955
6956#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6957 /* Writeback any dirty shadow registers we are about to unshadow. */
6958 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6959#endif
6960
6961 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6962 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6963 }
6964 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6965 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6966 {
6967 /** @todo there must be a better way for this and boot cArgsX? */
6968 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6969 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6970 & ~pReNative->Core.bmHstRegsWithGstShadow
6971 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6972 & fNotArgsMask;
6973 if (fRegs)
6974 {
6975 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6976 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6977 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6978 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6979 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6980 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6981 }
6982 else
6983 {
6984 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6985 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6986 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6987 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6988 }
6989 }
6990 else
6991 {
6992 idxReg = idxRegPref;
6993 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6994 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6995 }
6996 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6997 pVar->idxReg = idxReg;
6998
6999#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7000 pVar->fSimdReg = false;
7001#endif
7002
7003 /*
7004 * Load it off the stack if we've got a stack slot.
7005 */
7006 uint8_t const idxStackSlot = pVar->idxStackSlot;
7007 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7008 {
7009 Assert(fInitialized);
7010 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7011 switch (pVar->cbVar)
7012 {
7013 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7014 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7015 case 3: AssertFailed(); RT_FALL_THRU();
7016 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7017 default: AssertFailed(); RT_FALL_THRU();
7018 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7019 }
7020 }
7021 else
7022 {
7023 Assert(idxStackSlot == UINT8_MAX);
7024 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7025 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7026 else
7027 {
7028 /*
7029 * Convert from immediate to stack/register. This is currently only
7030 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7031 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7032 */
7033 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7034 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7035 idxVar, idxReg, pVar->u.uValue));
7036 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7037 pVar->enmKind = kIemNativeVarKind_Stack;
7038 }
7039 }
7040
7041 pVar->fRegAcquired = true;
7042 return idxReg;
7043}
7044
7045
7046#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7047/**
7048 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7049 * fixed till we call iemNativeVarRegisterRelease.
7050 *
7051 * @returns The host register number.
7052 * @param pReNative The recompiler state.
7053 * @param idxVar The variable.
7054 * @param poff Pointer to the instruction buffer offset.
7055 * In case a register needs to be freed up or the value
7056 * loaded off the stack.
7057 * @param fInitialized Set if the variable must already have been initialized.
7058 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7059 * the case.
7060 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7061 */
7062DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7063 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7064{
7065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7066 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7067 Assert( pVar->cbVar == sizeof(RTUINT128U)
7068 || pVar->cbVar == sizeof(RTUINT256U));
7069 Assert(!pVar->fRegAcquired);
7070
7071 uint8_t idxReg = pVar->idxReg;
7072 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7073 {
7074 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7075 && pVar->enmKind < kIemNativeVarKind_End);
7076 pVar->fRegAcquired = true;
7077 return idxReg;
7078 }
7079
7080 /*
7081 * If the kind of variable has not yet been set, default to 'stack'.
7082 */
7083 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7084 && pVar->enmKind < kIemNativeVarKind_End);
7085 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7086 iemNativeVarSetKindToStack(pReNative, idxVar);
7087
7088 /*
7089 * We have to allocate a register for the variable, even if its a stack one
7090 * as we don't know if there are modification being made to it before its
7091 * finalized (todo: analyze and insert hints about that?).
7092 *
7093 * If we can, we try get the correct register for argument variables. This
7094 * is assuming that most argument variables are fetched as close as possible
7095 * to the actual call, so that there aren't any interfering hidden calls
7096 * (memory accesses, etc) inbetween.
7097 *
7098 * If we cannot or it's a variable, we make sure no argument registers
7099 * that will be used by this MC block will be allocated here, and we always
7100 * prefer non-volatile registers to avoid needing to spill stuff for internal
7101 * call.
7102 */
7103 /** @todo Detect too early argument value fetches and warn about hidden
7104 * calls causing less optimal code to be generated in the python script. */
7105
7106 uint8_t const uArgNo = pVar->uArgNo;
7107 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7108
7109 /* SIMD is bit simpler for now because there is no support for arguments. */
7110 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7111 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7112 {
7113 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7114 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7115 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7116 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7117 & fNotArgsMask;
7118 if (fRegs)
7119 {
7120 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7121 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7122 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7123 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7124 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7125 }
7126 else
7127 {
7128 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7129 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7130 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7131 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7132 }
7133 }
7134 else
7135 {
7136 idxReg = idxRegPref;
7137 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7138 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7139 }
7140 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7141
7142 pVar->fSimdReg = true;
7143 pVar->idxReg = idxReg;
7144
7145 /*
7146 * Load it off the stack if we've got a stack slot.
7147 */
7148 uint8_t const idxStackSlot = pVar->idxStackSlot;
7149 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7150 {
7151 Assert(fInitialized);
7152 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7153 switch (pVar->cbVar)
7154 {
7155 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7156 default: AssertFailed(); RT_FALL_THRU();
7157 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7158 }
7159 }
7160 else
7161 {
7162 Assert(idxStackSlot == UINT8_MAX);
7163 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7164 }
7165 pVar->fRegAcquired = true;
7166 return idxReg;
7167}
7168#endif
7169
7170
7171/**
7172 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7173 * guest register.
7174 *
7175 * This function makes sure there is a register for it and sets it to be the
7176 * current shadow copy of @a enmGstReg.
7177 *
7178 * @returns The host register number.
7179 * @param pReNative The recompiler state.
7180 * @param idxVar The variable.
7181 * @param enmGstReg The guest register this variable will be written to
7182 * after this call.
7183 * @param poff Pointer to the instruction buffer offset.
7184 * In case a register needs to be freed up or if the
7185 * variable content needs to be loaded off the stack.
7186 *
7187 * @note We DO NOT expect @a idxVar to be an argument variable,
7188 * because we can only in the commit stage of an instruction when this
7189 * function is used.
7190 */
7191DECL_HIDDEN_THROW(uint8_t)
7192iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7193{
7194 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7195 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7196 Assert(!pVar->fRegAcquired);
7197 AssertMsgStmt( pVar->cbVar <= 8
7198 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7199 || pVar->enmKind == kIemNativeVarKind_Stack),
7200 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7201 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7202 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7203
7204 /*
7205 * This shouldn't ever be used for arguments, unless it's in a weird else
7206 * branch that doesn't do any calling and even then it's questionable.
7207 *
7208 * However, in case someone writes crazy wrong MC code and does register
7209 * updates before making calls, just use the regular register allocator to
7210 * ensure we get a register suitable for the intended argument number.
7211 */
7212 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7213
7214 /*
7215 * If there is already a register for the variable, we transfer/set the
7216 * guest shadow copy assignment to it.
7217 */
7218 uint8_t idxReg = pVar->idxReg;
7219 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7220 {
7221#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7222 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7223 {
7224# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7225 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7226 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7227# endif
7228 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7229 }
7230#endif
7231
7232 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7233 {
7234 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7235 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7236 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7237 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7238 }
7239 else
7240 {
7241 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7242 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7243 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7244 }
7245 /** @todo figure this one out. We need some way of making sure the register isn't
7246 * modified after this point, just in case we start writing crappy MC code. */
7247 pVar->enmGstReg = enmGstReg;
7248 pVar->fRegAcquired = true;
7249 return idxReg;
7250 }
7251 Assert(pVar->uArgNo == UINT8_MAX);
7252
7253 /*
7254 * Because this is supposed to be the commit stage, we're just tag along with the
7255 * temporary register allocator and upgrade it to a variable register.
7256 */
7257 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7258 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7259 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7260 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7261 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7262 pVar->idxReg = idxReg;
7263
7264 /*
7265 * Now we need to load the register value.
7266 */
7267 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7268 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7269 else
7270 {
7271 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7272 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7273 switch (pVar->cbVar)
7274 {
7275 case sizeof(uint64_t):
7276 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7277 break;
7278 case sizeof(uint32_t):
7279 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7280 break;
7281 case sizeof(uint16_t):
7282 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7283 break;
7284 case sizeof(uint8_t):
7285 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7286 break;
7287 default:
7288 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7289 }
7290 }
7291
7292 pVar->fRegAcquired = true;
7293 return idxReg;
7294}
7295
7296
7297/**
7298 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7299 *
7300 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7301 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7302 * requirement of flushing anything in volatile host registers when making a
7303 * call.
7304 *
7305 * @returns New @a off value.
7306 * @param pReNative The recompiler state.
7307 * @param off The code buffer position.
7308 * @param fHstRegsNotToSave Set of registers not to save & restore.
7309 */
7310DECL_HIDDEN_THROW(uint32_t)
7311iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7312{
7313 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7314 if (fHstRegs)
7315 {
7316 do
7317 {
7318 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7319 fHstRegs &= ~RT_BIT_32(idxHstReg);
7320
7321 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7322 {
7323 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7325 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7326 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7327 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7328 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7329 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7330 {
7331 case kIemNativeVarKind_Stack:
7332 {
7333 /* Temporarily spill the variable register. */
7334 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7335 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7336 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7337 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7338 continue;
7339 }
7340
7341 case kIemNativeVarKind_Immediate:
7342 case kIemNativeVarKind_VarRef:
7343 case kIemNativeVarKind_GstRegRef:
7344 /* It is weird to have any of these loaded at this point. */
7345 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7346 continue;
7347
7348 case kIemNativeVarKind_End:
7349 case kIemNativeVarKind_Invalid:
7350 break;
7351 }
7352 AssertFailed();
7353 }
7354 else
7355 {
7356 /*
7357 * Allocate a temporary stack slot and spill the register to it.
7358 */
7359 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7360 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7361 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7362 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7363 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7364 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7365 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7366 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7367 }
7368 } while (fHstRegs);
7369 }
7370#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7371
7372 /*
7373 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7374 * which would be more difficult due to spanning multiple stack slots and different sizes
7375 * (besides we only have a limited amount of slots at the moment).
7376 *
7377 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7378 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7379 */
7380 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7381
7382 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7383 if (fHstRegs)
7384 {
7385 do
7386 {
7387 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7388 fHstRegs &= ~RT_BIT_32(idxHstReg);
7389
7390 /* Fixed reserved and temporary registers don't need saving. */
7391 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7392 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7393 continue;
7394
7395 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7396
7397 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7399 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7400 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7401 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7402 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7403 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7404 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7405 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7406 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7407 {
7408 case kIemNativeVarKind_Stack:
7409 {
7410 /* Temporarily spill the variable register. */
7411 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7412 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7413 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7414 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7415 if (cbVar == sizeof(RTUINT128U))
7416 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7417 else
7418 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7419 continue;
7420 }
7421
7422 case kIemNativeVarKind_Immediate:
7423 case kIemNativeVarKind_VarRef:
7424 case kIemNativeVarKind_GstRegRef:
7425 /* It is weird to have any of these loaded at this point. */
7426 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7427 continue;
7428
7429 case kIemNativeVarKind_End:
7430 case kIemNativeVarKind_Invalid:
7431 break;
7432 }
7433 AssertFailed();
7434 } while (fHstRegs);
7435 }
7436#endif
7437 return off;
7438}
7439
7440
7441/**
7442 * Emit code to restore volatile registers after to a call to a helper.
7443 *
7444 * @returns New @a off value.
7445 * @param pReNative The recompiler state.
7446 * @param off The code buffer position.
7447 * @param fHstRegsNotToSave Set of registers not to save & restore.
7448 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7449 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7450 */
7451DECL_HIDDEN_THROW(uint32_t)
7452iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7453{
7454 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7455 if (fHstRegs)
7456 {
7457 do
7458 {
7459 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7460 fHstRegs &= ~RT_BIT_32(idxHstReg);
7461
7462 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7463 {
7464 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7465 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7466 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7467 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7468 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7469 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7470 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7471 {
7472 case kIemNativeVarKind_Stack:
7473 {
7474 /* Unspill the variable register. */
7475 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7476 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7477 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7478 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7479 continue;
7480 }
7481
7482 case kIemNativeVarKind_Immediate:
7483 case kIemNativeVarKind_VarRef:
7484 case kIemNativeVarKind_GstRegRef:
7485 /* It is weird to have any of these loaded at this point. */
7486 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7487 continue;
7488
7489 case kIemNativeVarKind_End:
7490 case kIemNativeVarKind_Invalid:
7491 break;
7492 }
7493 AssertFailed();
7494 }
7495 else
7496 {
7497 /*
7498 * Restore from temporary stack slot.
7499 */
7500 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7501 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7502 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7503 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7504
7505 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7506 }
7507 } while (fHstRegs);
7508 }
7509#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7510 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7511 if (fHstRegs)
7512 {
7513 do
7514 {
7515 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7516 fHstRegs &= ~RT_BIT_32(idxHstReg);
7517
7518 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7519 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7520 continue;
7521 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7522
7523 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7525 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7526 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7527 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7528 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7529 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7530 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7531 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7532 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7533 {
7534 case kIemNativeVarKind_Stack:
7535 {
7536 /* Unspill the variable register. */
7537 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7538 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7539 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7540 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7541
7542 if (cbVar == sizeof(RTUINT128U))
7543 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7544 else
7545 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7546 continue;
7547 }
7548
7549 case kIemNativeVarKind_Immediate:
7550 case kIemNativeVarKind_VarRef:
7551 case kIemNativeVarKind_GstRegRef:
7552 /* It is weird to have any of these loaded at this point. */
7553 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7554 continue;
7555
7556 case kIemNativeVarKind_End:
7557 case kIemNativeVarKind_Invalid:
7558 break;
7559 }
7560 AssertFailed();
7561 } while (fHstRegs);
7562 }
7563#endif
7564 return off;
7565}
7566
7567
7568/**
7569 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7570 *
7571 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7572 *
7573 * ASSUMES that @a idxVar is valid and unpacked.
7574 */
7575DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7576{
7577 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7578 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7579 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7580 {
7581 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7582 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7583 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7584 Assert(cSlots > 0);
7585 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7586 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7587 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7588 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7589 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7590 }
7591 else
7592 Assert(idxStackSlot == UINT8_MAX);
7593}
7594
7595
7596/**
7597 * Worker that frees a single variable.
7598 *
7599 * ASSUMES that @a idxVar is valid and unpacked.
7600 */
7601DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7602{
7603 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7604 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7605 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7606
7607 /* Free the host register first if any assigned. */
7608 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7609#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7610 if ( idxHstReg != UINT8_MAX
7611 && pReNative->Core.aVars[idxVar].fSimdReg)
7612 {
7613 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7614 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7615 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7616 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7617 }
7618 else
7619#endif
7620 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7621 {
7622 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7623 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7624 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7625 }
7626
7627 /* Free argument mapping. */
7628 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7629 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7630 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7631
7632 /* Free the stack slots. */
7633 iemNativeVarFreeStackSlots(pReNative, idxVar);
7634
7635 /* Free the actual variable. */
7636 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7637 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7638}
7639
7640
7641/**
7642 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7643 */
7644DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7645{
7646 while (bmVars != 0)
7647 {
7648 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7649 bmVars &= ~RT_BIT_32(idxVar);
7650
7651#if 1 /** @todo optimize by simplifying this later... */
7652 iemNativeVarFreeOneWorker(pReNative, idxVar);
7653#else
7654 /* Only need to free the host register, the rest is done as bulk updates below. */
7655 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7656 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7657 {
7658 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7659 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7660 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7661 }
7662#endif
7663 }
7664#if 0 /** @todo optimize by simplifying this later... */
7665 pReNative->Core.bmVars = 0;
7666 pReNative->Core.bmStack = 0;
7667 pReNative->Core.u64ArgVars = UINT64_MAX;
7668#endif
7669}
7670
7671
7672
7673/*********************************************************************************************************************************
7674* Emitters for IEM_MC_CALL_CIMPL_XXX *
7675*********************************************************************************************************************************/
7676
7677/**
7678 * Emits code to load a reference to the given guest register into @a idxGprDst.
7679 */
7680DECL_HIDDEN_THROW(uint32_t)
7681iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7682 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7683{
7684#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7685 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7686#endif
7687
7688 /*
7689 * Get the offset relative to the CPUMCTX structure.
7690 */
7691 uint32_t offCpumCtx;
7692 switch (enmClass)
7693 {
7694 case kIemNativeGstRegRef_Gpr:
7695 Assert(idxRegInClass < 16);
7696 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7697 break;
7698
7699 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7700 Assert(idxRegInClass < 4);
7701 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7702 break;
7703
7704 case kIemNativeGstRegRef_EFlags:
7705 Assert(idxRegInClass == 0);
7706 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7707 break;
7708
7709 case kIemNativeGstRegRef_MxCsr:
7710 Assert(idxRegInClass == 0);
7711 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7712 break;
7713
7714 case kIemNativeGstRegRef_FpuReg:
7715 Assert(idxRegInClass < 8);
7716 AssertFailed(); /** @todo what kind of indexing? */
7717 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7718 break;
7719
7720 case kIemNativeGstRegRef_MReg:
7721 Assert(idxRegInClass < 8);
7722 AssertFailed(); /** @todo what kind of indexing? */
7723 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7724 break;
7725
7726 case kIemNativeGstRegRef_XReg:
7727 Assert(idxRegInClass < 16);
7728 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7729 break;
7730
7731 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7732 Assert(idxRegInClass == 0);
7733 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7734 break;
7735
7736 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7737 Assert(idxRegInClass == 0);
7738 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7739 break;
7740
7741 default:
7742 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7743 }
7744
7745 /*
7746 * Load the value into the destination register.
7747 */
7748#ifdef RT_ARCH_AMD64
7749 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7750
7751#elif defined(RT_ARCH_ARM64)
7752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7753 Assert(offCpumCtx < 4096);
7754 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7755
7756#else
7757# error "Port me!"
7758#endif
7759
7760 return off;
7761}
7762
7763
7764/**
7765 * Common code for CIMPL and AIMPL calls.
7766 *
7767 * These are calls that uses argument variables and such. They should not be
7768 * confused with internal calls required to implement an MC operation,
7769 * like a TLB load and similar.
7770 *
7771 * Upon return all that is left to do is to load any hidden arguments and
7772 * perform the call. All argument variables are freed.
7773 *
7774 * @returns New code buffer offset; throws VBox status code on error.
7775 * @param pReNative The native recompile state.
7776 * @param off The code buffer offset.
7777 * @param cArgs The total nubmer of arguments (includes hidden
7778 * count).
7779 * @param cHiddenArgs The number of hidden arguments. The hidden
7780 * arguments must not have any variable declared for
7781 * them, whereas all the regular arguments must
7782 * (tstIEMCheckMc ensures this).
7783 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7784 * this will still flush pending writes in call volatile registers if false.
7785 */
7786DECL_HIDDEN_THROW(uint32_t)
7787iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7788 bool fFlushPendingWrites /*= true*/)
7789{
7790#ifdef VBOX_STRICT
7791 /*
7792 * Assert sanity.
7793 */
7794 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7795 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7796 for (unsigned i = 0; i < cHiddenArgs; i++)
7797 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7798 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7799 {
7800 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7801 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7802 }
7803 iemNativeRegAssertSanity(pReNative);
7804#endif
7805
7806 /* We don't know what the called function makes use of, so flush any pending register writes. */
7807 RT_NOREF(fFlushPendingWrites);
7808#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7809 if (fFlushPendingWrites)
7810#endif
7811 off = iemNativeRegFlushPendingWrites(pReNative, off);
7812
7813 /*
7814 * Before we do anything else, go over variables that are referenced and
7815 * make sure they are not in a register.
7816 */
7817 uint32_t bmVars = pReNative->Core.bmVars;
7818 if (bmVars)
7819 {
7820 do
7821 {
7822 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7823 bmVars &= ~RT_BIT_32(idxVar);
7824
7825 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7826 {
7827 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7828#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7829 if ( idxRegOld != UINT8_MAX
7830 && pReNative->Core.aVars[idxVar].fSimdReg)
7831 {
7832 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7833 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7834
7835 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7836 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7837 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7838 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7839 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7840 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7841 else
7842 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7843
7844 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7845 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7846
7847 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7848 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7849 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7850 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7851 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7852 }
7853 else
7854#endif
7855 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7856 {
7857 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7858 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7859 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7860 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7861 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7862
7863 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7864 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7865 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7866 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7867 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7868 }
7869 }
7870 } while (bmVars != 0);
7871#if 0 //def VBOX_STRICT
7872 iemNativeRegAssertSanity(pReNative);
7873#endif
7874 }
7875
7876 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7877
7878#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7879 /*
7880 * At the very first step go over the host registers that will be used for arguments
7881 * don't shadow anything which needs writing back first.
7882 */
7883 for (uint32_t i = 0; i < cRegArgs; i++)
7884 {
7885 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7886
7887 /* Writeback any dirty guest shadows before using this register. */
7888 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7889 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7890 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7891 }
7892#endif
7893
7894 /*
7895 * First, go over the host registers that will be used for arguments and make
7896 * sure they either hold the desired argument or are free.
7897 */
7898 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7899 {
7900 for (uint32_t i = 0; i < cRegArgs; i++)
7901 {
7902 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7903 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7904 {
7905 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7906 {
7907 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7909 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7910 Assert(pVar->idxReg == idxArgReg);
7911 uint8_t const uArgNo = pVar->uArgNo;
7912 if (uArgNo == i)
7913 { /* prefect */ }
7914 /* The variable allocator logic should make sure this is impossible,
7915 except for when the return register is used as a parameter (ARM,
7916 but not x86). */
7917#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7918 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7919 {
7920# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7921# error "Implement this"
7922# endif
7923 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7924 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7925 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7926 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7927 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7928 }
7929#endif
7930 else
7931 {
7932 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7933
7934 if (pVar->enmKind == kIemNativeVarKind_Stack)
7935 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7936 else
7937 {
7938 /* just free it, can be reloaded if used again */
7939 pVar->idxReg = UINT8_MAX;
7940 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7941 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7942 }
7943 }
7944 }
7945 else
7946 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7947 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7948 }
7949 }
7950#if 0 //def VBOX_STRICT
7951 iemNativeRegAssertSanity(pReNative);
7952#endif
7953 }
7954
7955 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7956
7957#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7958 /*
7959 * If there are any stack arguments, make sure they are in their place as well.
7960 *
7961 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7962 * the caller) be loading it later and it must be free (see first loop).
7963 */
7964 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7965 {
7966 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7967 {
7968 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7969 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7970 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7971 {
7972 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7973 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7974 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7975 pVar->idxReg = UINT8_MAX;
7976 }
7977 else
7978 {
7979 /* Use ARG0 as temp for stuff we need registers for. */
7980 switch (pVar->enmKind)
7981 {
7982 case kIemNativeVarKind_Stack:
7983 {
7984 uint8_t const idxStackSlot = pVar->idxStackSlot;
7985 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7986 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7987 iemNativeStackCalcBpDisp(idxStackSlot));
7988 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7989 continue;
7990 }
7991
7992 case kIemNativeVarKind_Immediate:
7993 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7994 continue;
7995
7996 case kIemNativeVarKind_VarRef:
7997 {
7998 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7999 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8000 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8001 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8002 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8003# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8004 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8005 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8006 if ( fSimdReg
8007 && idxRegOther != UINT8_MAX)
8008 {
8009 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8010 if (cbVar == sizeof(RTUINT128U))
8011 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8012 else
8013 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8014 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8015 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8016 }
8017 else
8018# endif
8019 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8020 {
8021 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8022 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8023 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8024 }
8025 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8026 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8027 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8028 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8029 continue;
8030 }
8031
8032 case kIemNativeVarKind_GstRegRef:
8033 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8034 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8035 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8036 continue;
8037
8038 case kIemNativeVarKind_Invalid:
8039 case kIemNativeVarKind_End:
8040 break;
8041 }
8042 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8043 }
8044 }
8045# if 0 //def VBOX_STRICT
8046 iemNativeRegAssertSanity(pReNative);
8047# endif
8048 }
8049#else
8050 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8051#endif
8052
8053 /*
8054 * Make sure the argument variables are loaded into their respective registers.
8055 *
8056 * We can optimize this by ASSUMING that any register allocations are for
8057 * registeres that have already been loaded and are ready. The previous step
8058 * saw to that.
8059 */
8060 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8061 {
8062 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8063 {
8064 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8065 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8066 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8067 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8068 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8069 else
8070 {
8071 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8072 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8073 {
8074 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8075 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8076 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8077 | RT_BIT_32(idxArgReg);
8078 pVar->idxReg = idxArgReg;
8079 }
8080 else
8081 {
8082 /* Use ARG0 as temp for stuff we need registers for. */
8083 switch (pVar->enmKind)
8084 {
8085 case kIemNativeVarKind_Stack:
8086 {
8087 uint8_t const idxStackSlot = pVar->idxStackSlot;
8088 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8089 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8090 continue;
8091 }
8092
8093 case kIemNativeVarKind_Immediate:
8094 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8095 continue;
8096
8097 case kIemNativeVarKind_VarRef:
8098 {
8099 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8100 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8101 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8102 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8103 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8104 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8105#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8106 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8107 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8108 if ( fSimdReg
8109 && idxRegOther != UINT8_MAX)
8110 {
8111 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8112 if (cbVar == sizeof(RTUINT128U))
8113 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8114 else
8115 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8116 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8117 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8118 }
8119 else
8120#endif
8121 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8122 {
8123 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8124 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8125 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8126 }
8127 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8128 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8129 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8130 continue;
8131 }
8132
8133 case kIemNativeVarKind_GstRegRef:
8134 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8135 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8136 continue;
8137
8138 case kIemNativeVarKind_Invalid:
8139 case kIemNativeVarKind_End:
8140 break;
8141 }
8142 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8143 }
8144 }
8145 }
8146#if 0 //def VBOX_STRICT
8147 iemNativeRegAssertSanity(pReNative);
8148#endif
8149 }
8150#ifdef VBOX_STRICT
8151 else
8152 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8153 {
8154 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8155 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8156 }
8157#endif
8158
8159 /*
8160 * Free all argument variables (simplified).
8161 * Their lifetime always expires with the call they are for.
8162 */
8163 /** @todo Make the python script check that arguments aren't used after
8164 * IEM_MC_CALL_XXXX. */
8165 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8166 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8167 * an argument value. There is also some FPU stuff. */
8168 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8169 {
8170 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8171 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8172
8173 /* no need to free registers: */
8174 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8175 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8176 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8177 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8178 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8179 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8180
8181 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8182 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8183 iemNativeVarFreeStackSlots(pReNative, idxVar);
8184 }
8185 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8186
8187 /*
8188 * Flush volatile registers as we make the call.
8189 */
8190 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8191
8192 return off;
8193}
8194
8195
8196
8197/*********************************************************************************************************************************
8198* TLB Lookup. *
8199*********************************************************************************************************************************/
8200
8201/**
8202 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8203 */
8204DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8205{
8206 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8207 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8208 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8209 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8210
8211 /* Do the lookup manually. */
8212 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8213 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8214 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8215 if (RT_LIKELY(pTlbe->uTag == uTag))
8216 {
8217 /*
8218 * Check TLB page table level access flags.
8219 */
8220 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8221 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8222 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8223 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8224 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8225 | IEMTLBE_F_PG_UNASSIGNED
8226 | IEMTLBE_F_PT_NO_ACCESSED
8227 | fNoWriteNoDirty | fNoUser);
8228 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8229 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8230 {
8231 /*
8232 * Return the address.
8233 */
8234 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8235 if ((uintptr_t)pbAddr == uResult)
8236 return;
8237 RT_NOREF(cbMem);
8238 AssertFailed();
8239 }
8240 else
8241 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8242 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8243 }
8244 else
8245 AssertFailed();
8246 RT_BREAKPOINT();
8247}
8248
8249/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8250
8251
8252
8253/*********************************************************************************************************************************
8254* Recompiler Core. *
8255*********************************************************************************************************************************/
8256
8257/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8258static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8259{
8260 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8261 pDis->cbCachedInstr += cbMaxRead;
8262 RT_NOREF(cbMinRead);
8263 return VERR_NO_DATA;
8264}
8265
8266
8267DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8268{
8269 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8270 {
8271#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8272 ENTRY(fLocalForcedActions),
8273 ENTRY(iem.s.rcPassUp),
8274 ENTRY(iem.s.fExec),
8275 ENTRY(iem.s.pbInstrBuf),
8276 ENTRY(iem.s.uInstrBufPc),
8277 ENTRY(iem.s.GCPhysInstrBuf),
8278 ENTRY(iem.s.cbInstrBufTotal),
8279 ENTRY(iem.s.idxTbCurInstr),
8280 ENTRY(iem.s.fSkippingEFlags),
8281#ifdef VBOX_WITH_STATISTICS
8282 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8283 ENTRY(iem.s.StatNativeTlbHitsForStore),
8284 ENTRY(iem.s.StatNativeTlbHitsForStack),
8285 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8286 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8287 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8288 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8289 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8290#endif
8291 ENTRY(iem.s.DataTlb.uTlbRevision),
8292 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8293 ENTRY(iem.s.DataTlb.cTlbHits),
8294 ENTRY(iem.s.DataTlb.aEntries),
8295 ENTRY(iem.s.CodeTlb.uTlbRevision),
8296 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8297 ENTRY(iem.s.CodeTlb.cTlbHits),
8298 ENTRY(iem.s.CodeTlb.aEntries),
8299 ENTRY(pVMR3),
8300 ENTRY(cpum.GstCtx.rax),
8301 ENTRY(cpum.GstCtx.ah),
8302 ENTRY(cpum.GstCtx.rcx),
8303 ENTRY(cpum.GstCtx.ch),
8304 ENTRY(cpum.GstCtx.rdx),
8305 ENTRY(cpum.GstCtx.dh),
8306 ENTRY(cpum.GstCtx.rbx),
8307 ENTRY(cpum.GstCtx.bh),
8308 ENTRY(cpum.GstCtx.rsp),
8309 ENTRY(cpum.GstCtx.rbp),
8310 ENTRY(cpum.GstCtx.rsi),
8311 ENTRY(cpum.GstCtx.rdi),
8312 ENTRY(cpum.GstCtx.r8),
8313 ENTRY(cpum.GstCtx.r9),
8314 ENTRY(cpum.GstCtx.r10),
8315 ENTRY(cpum.GstCtx.r11),
8316 ENTRY(cpum.GstCtx.r12),
8317 ENTRY(cpum.GstCtx.r13),
8318 ENTRY(cpum.GstCtx.r14),
8319 ENTRY(cpum.GstCtx.r15),
8320 ENTRY(cpum.GstCtx.es.Sel),
8321 ENTRY(cpum.GstCtx.es.u64Base),
8322 ENTRY(cpum.GstCtx.es.u32Limit),
8323 ENTRY(cpum.GstCtx.es.Attr),
8324 ENTRY(cpum.GstCtx.cs.Sel),
8325 ENTRY(cpum.GstCtx.cs.u64Base),
8326 ENTRY(cpum.GstCtx.cs.u32Limit),
8327 ENTRY(cpum.GstCtx.cs.Attr),
8328 ENTRY(cpum.GstCtx.ss.Sel),
8329 ENTRY(cpum.GstCtx.ss.u64Base),
8330 ENTRY(cpum.GstCtx.ss.u32Limit),
8331 ENTRY(cpum.GstCtx.ss.Attr),
8332 ENTRY(cpum.GstCtx.ds.Sel),
8333 ENTRY(cpum.GstCtx.ds.u64Base),
8334 ENTRY(cpum.GstCtx.ds.u32Limit),
8335 ENTRY(cpum.GstCtx.ds.Attr),
8336 ENTRY(cpum.GstCtx.fs.Sel),
8337 ENTRY(cpum.GstCtx.fs.u64Base),
8338 ENTRY(cpum.GstCtx.fs.u32Limit),
8339 ENTRY(cpum.GstCtx.fs.Attr),
8340 ENTRY(cpum.GstCtx.gs.Sel),
8341 ENTRY(cpum.GstCtx.gs.u64Base),
8342 ENTRY(cpum.GstCtx.gs.u32Limit),
8343 ENTRY(cpum.GstCtx.gs.Attr),
8344 ENTRY(cpum.GstCtx.rip),
8345 ENTRY(cpum.GstCtx.eflags),
8346 ENTRY(cpum.GstCtx.uRipInhibitInt),
8347 ENTRY(cpum.GstCtx.cr0),
8348 ENTRY(cpum.GstCtx.cr4),
8349 ENTRY(cpum.GstCtx.aXcr[0]),
8350 ENTRY(cpum.GstCtx.aXcr[1]),
8351#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8352 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8353 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8354 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8355 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8356 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8357 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8358 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8359 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8360 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8361 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8362 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8363 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8364 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8365 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8366 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8367 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8368 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8369 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8370 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8371 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8372 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8373 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8374 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8375 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8376 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8377 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8378 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8379 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8380 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8381 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8382 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8383 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8384 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8385#endif
8386#undef ENTRY
8387 };
8388#ifdef VBOX_STRICT
8389 static bool s_fOrderChecked = false;
8390 if (!s_fOrderChecked)
8391 {
8392 s_fOrderChecked = true;
8393 uint32_t offPrev = s_aMembers[0].off;
8394 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8395 {
8396 Assert(s_aMembers[i].off > offPrev);
8397 offPrev = s_aMembers[i].off;
8398 }
8399 }
8400#endif
8401
8402 /*
8403 * Binary lookup.
8404 */
8405 unsigned iStart = 0;
8406 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8407 for (;;)
8408 {
8409 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8410 uint32_t const offCur = s_aMembers[iCur].off;
8411 if (off < offCur)
8412 {
8413 if (iCur != iStart)
8414 iEnd = iCur;
8415 else
8416 break;
8417 }
8418 else if (off > offCur)
8419 {
8420 if (iCur + 1 < iEnd)
8421 iStart = iCur + 1;
8422 else
8423 break;
8424 }
8425 else
8426 return s_aMembers[iCur].pszName;
8427 }
8428#ifdef VBOX_WITH_STATISTICS
8429 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8430 return "iem.s.acThreadedFuncStats[iFn]";
8431#endif
8432 return NULL;
8433}
8434
8435
8436DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8437{
8438 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8439#if defined(RT_ARCH_AMD64)
8440 static const char * const a_apszMarkers[] =
8441 {
8442 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8443 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8444 };
8445#endif
8446
8447 char szDisBuf[512];
8448 DISSTATE Dis;
8449 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8450 uint32_t const cNative = pTb->Native.cInstructions;
8451 uint32_t offNative = 0;
8452#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8453 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8454#endif
8455 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8456 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8457 : DISCPUMODE_64BIT;
8458#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8459 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8460#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8461 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8462#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8463# error "Port me"
8464#else
8465 csh hDisasm = ~(size_t)0;
8466# if defined(RT_ARCH_AMD64)
8467 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8468# elif defined(RT_ARCH_ARM64)
8469 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8470# else
8471# error "Port me"
8472# endif
8473 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8474
8475 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8476 //Assert(rcCs == CS_ERR_OK);
8477#endif
8478
8479 /*
8480 * Print TB info.
8481 */
8482 pHlp->pfnPrintf(pHlp,
8483 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8484 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8485 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8486 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8487#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8488 if (pDbgInfo && pDbgInfo->cEntries > 1)
8489 {
8490 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8491
8492 /*
8493 * This disassembly is driven by the debug info which follows the native
8494 * code and indicates when it starts with the next guest instructions,
8495 * where labels are and such things.
8496 */
8497 uint32_t idxThreadedCall = 0;
8498 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8499 uint8_t idxRange = UINT8_MAX;
8500 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8501 uint32_t offRange = 0;
8502 uint32_t offOpcodes = 0;
8503 uint32_t const cbOpcodes = pTb->cbOpcodes;
8504 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8505 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8506 uint32_t iDbgEntry = 1;
8507 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8508
8509 while (offNative < cNative)
8510 {
8511 /* If we're at or have passed the point where the next chunk of debug
8512 info starts, process it. */
8513 if (offDbgNativeNext <= offNative)
8514 {
8515 offDbgNativeNext = UINT32_MAX;
8516 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8517 {
8518 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8519 {
8520 case kIemTbDbgEntryType_GuestInstruction:
8521 {
8522 /* Did the exec flag change? */
8523 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8524 {
8525 pHlp->pfnPrintf(pHlp,
8526 " fExec change %#08x -> %#08x %s\n",
8527 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8528 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8529 szDisBuf, sizeof(szDisBuf)));
8530 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8531 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8532 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8533 : DISCPUMODE_64BIT;
8534 }
8535
8536 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8537 where the compilation was aborted before the opcode was recorded and the actual
8538 instruction was translated to a threaded call. This may happen when we run out
8539 of ranges, or when some complicated interrupts/FFs are found to be pending or
8540 similar. So, we just deal with it here rather than in the compiler code as it
8541 is a lot simpler to do here. */
8542 if ( idxRange == UINT8_MAX
8543 || idxRange >= cRanges
8544 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8545 {
8546 idxRange += 1;
8547 if (idxRange < cRanges)
8548 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8549 else
8550 continue;
8551 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8552 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8553 + (pTb->aRanges[idxRange].idxPhysPage == 0
8554 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8555 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8556 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8557 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8558 pTb->aRanges[idxRange].idxPhysPage);
8559 GCPhysPc += offRange;
8560 }
8561
8562 /* Disassemble the instruction. */
8563 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8564 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8565 uint32_t cbInstr = 1;
8566 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8567 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8568 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8569 if (RT_SUCCESS(rc))
8570 {
8571 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8572 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8573 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8574 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8575
8576 static unsigned const s_offMarker = 55;
8577 static char const s_szMarker[] = " ; <--- guest";
8578 if (cch < s_offMarker)
8579 {
8580 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8581 cch = s_offMarker;
8582 }
8583 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8584 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8585
8586 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8587 }
8588 else
8589 {
8590 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8591 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8592 cbInstr = 1;
8593 }
8594 GCPhysPc += cbInstr;
8595 offOpcodes += cbInstr;
8596 offRange += cbInstr;
8597 continue;
8598 }
8599
8600 case kIemTbDbgEntryType_ThreadedCall:
8601 pHlp->pfnPrintf(pHlp,
8602 " Call #%u to %s (%u args) - %s\n",
8603 idxThreadedCall,
8604 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8605 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8606 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8607 idxThreadedCall++;
8608 continue;
8609
8610 case kIemTbDbgEntryType_GuestRegShadowing:
8611 {
8612 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8613 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8614 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8615 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8616 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8617 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8618 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8619 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8620 else
8621 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8622 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8623 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8624 continue;
8625 }
8626
8627#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8628 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8629 {
8630 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8631 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8632 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8633 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8634 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8635 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8636 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8637 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8638 else
8639 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8640 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8641 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8642 continue;
8643 }
8644#endif
8645
8646 case kIemTbDbgEntryType_Label:
8647 {
8648 const char *pszName = "what_the_fudge";
8649 const char *pszComment = "";
8650 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8651 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8652 {
8653 case kIemNativeLabelType_Return: pszName = "Return"; break;
8654 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8655 case kIemNativeLabelType_ReturnBreakFF: pszName = "ReturnBreakFF"; break;
8656 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8657 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8658 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8659 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8660 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8661 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8662 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8663 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8664 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8665 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8666 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8667 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8668 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8669 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8670 case kIemNativeLabelType_If:
8671 pszName = "If";
8672 fNumbered = true;
8673 break;
8674 case kIemNativeLabelType_Else:
8675 pszName = "Else";
8676 fNumbered = true;
8677 pszComment = " ; regs state restored pre-if-block";
8678 break;
8679 case kIemNativeLabelType_Endif:
8680 pszName = "Endif";
8681 fNumbered = true;
8682 break;
8683 case kIemNativeLabelType_CheckIrq:
8684 pszName = "CheckIrq_CheckVM";
8685 fNumbered = true;
8686 break;
8687 case kIemNativeLabelType_TlbLookup:
8688 pszName = "TlbLookup";
8689 fNumbered = true;
8690 break;
8691 case kIemNativeLabelType_TlbMiss:
8692 pszName = "TlbMiss";
8693 fNumbered = true;
8694 break;
8695 case kIemNativeLabelType_TlbDone:
8696 pszName = "TlbDone";
8697 fNumbered = true;
8698 break;
8699 case kIemNativeLabelType_Invalid:
8700 case kIemNativeLabelType_End:
8701 break;
8702 }
8703 if (fNumbered)
8704 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8705 else
8706 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8707 continue;
8708 }
8709
8710 case kIemTbDbgEntryType_NativeOffset:
8711 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8712 Assert(offDbgNativeNext >= offNative);
8713 break;
8714
8715#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8716 case kIemTbDbgEntryType_DelayedPcUpdate:
8717 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8718 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8719 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8720 continue;
8721#endif
8722
8723#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8724 case kIemTbDbgEntryType_GuestRegDirty:
8725 {
8726 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8727 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8728 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8729 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8730 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8731 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8732 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8733 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8734 pszGstReg, pszHstReg);
8735 continue;
8736 }
8737
8738 case kIemTbDbgEntryType_GuestRegWriteback:
8739 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8740 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8741 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
8742 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
8743 continue;
8744#endif
8745
8746 default:
8747 AssertFailed();
8748 }
8749 iDbgEntry++;
8750 break;
8751 }
8752 }
8753
8754 /*
8755 * Disassemble the next native instruction.
8756 */
8757 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8758# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8759 uint32_t cbInstr = sizeof(paNative[0]);
8760 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8761 if (RT_SUCCESS(rc))
8762 {
8763# if defined(RT_ARCH_AMD64)
8764 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8765 {
8766 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8767 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8768 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8769 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8770 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8771 uInfo & 0x8000 ? "recompiled" : "todo");
8772 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8773 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8774 else
8775 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8776 }
8777 else
8778# endif
8779 {
8780 const char *pszAnnotation = NULL;
8781# ifdef RT_ARCH_AMD64
8782 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8783 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8784 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8785 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8786 PCDISOPPARAM pMemOp;
8787 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8788 pMemOp = &Dis.Param1;
8789 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8790 pMemOp = &Dis.Param2;
8791 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8792 pMemOp = &Dis.Param3;
8793 else
8794 pMemOp = NULL;
8795 if ( pMemOp
8796 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8797 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8798 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8799 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8800
8801#elif defined(RT_ARCH_ARM64)
8802 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8803 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8804 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8805# else
8806# error "Port me"
8807# endif
8808 if (pszAnnotation)
8809 {
8810 static unsigned const s_offAnnotation = 55;
8811 size_t const cchAnnotation = strlen(pszAnnotation);
8812 size_t cchDis = strlen(szDisBuf);
8813 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8814 {
8815 if (cchDis < s_offAnnotation)
8816 {
8817 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8818 cchDis = s_offAnnotation;
8819 }
8820 szDisBuf[cchDis++] = ' ';
8821 szDisBuf[cchDis++] = ';';
8822 szDisBuf[cchDis++] = ' ';
8823 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8824 }
8825 }
8826 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8827 }
8828 }
8829 else
8830 {
8831# if defined(RT_ARCH_AMD64)
8832 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8833 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8834# elif defined(RT_ARCH_ARM64)
8835 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8836# else
8837# error "Port me"
8838# endif
8839 cbInstr = sizeof(paNative[0]);
8840 }
8841 offNative += cbInstr / sizeof(paNative[0]);
8842
8843# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8844 cs_insn *pInstr;
8845 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8846 (uintptr_t)pNativeCur, 1, &pInstr);
8847 if (cInstrs > 0)
8848 {
8849 Assert(cInstrs == 1);
8850 const char *pszAnnotation = NULL;
8851# if defined(RT_ARCH_ARM64)
8852 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8853 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8854 {
8855 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8856 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8857 char *psz = strchr(pInstr->op_str, '[');
8858 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8859 {
8860 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8861 int32_t off = -1;
8862 psz += 4;
8863 if (*psz == ']')
8864 off = 0;
8865 else if (*psz == ',')
8866 {
8867 psz = RTStrStripL(psz + 1);
8868 if (*psz == '#')
8869 off = RTStrToInt32(&psz[1]);
8870 /** @todo deal with index registers and LSL as well... */
8871 }
8872 if (off >= 0)
8873 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8874 }
8875 }
8876# endif
8877
8878 size_t const cchOp = strlen(pInstr->op_str);
8879# if defined(RT_ARCH_AMD64)
8880 if (pszAnnotation)
8881 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8882 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8883 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8884 else
8885 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8886 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8887
8888# else
8889 if (pszAnnotation)
8890 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8891 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8892 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8893 else
8894 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8895 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8896# endif
8897 offNative += pInstr->size / sizeof(*pNativeCur);
8898 cs_free(pInstr, cInstrs);
8899 }
8900 else
8901 {
8902# if defined(RT_ARCH_AMD64)
8903 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8904 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8905# else
8906 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8907# endif
8908 offNative++;
8909 }
8910# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8911 }
8912 }
8913 else
8914#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8915 {
8916 /*
8917 * No debug info, just disassemble the x86 code and then the native code.
8918 *
8919 * First the guest code:
8920 */
8921 for (unsigned i = 0; i < pTb->cRanges; i++)
8922 {
8923 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8924 + (pTb->aRanges[i].idxPhysPage == 0
8925 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8926 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8927 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8928 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8929 unsigned off = pTb->aRanges[i].offOpcodes;
8930 /** @todo this ain't working when crossing pages! */
8931 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8932 while (off < cbOpcodes)
8933 {
8934 uint32_t cbInstr = 1;
8935 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8936 &pTb->pabOpcodes[off], cbOpcodes - off,
8937 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8938 if (RT_SUCCESS(rc))
8939 {
8940 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8941 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8942 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8943 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8944 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8945 GCPhysPc += cbInstr;
8946 off += cbInstr;
8947 }
8948 else
8949 {
8950 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8951 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8952 break;
8953 }
8954 }
8955 }
8956
8957 /*
8958 * Then the native code:
8959 */
8960 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8961 while (offNative < cNative)
8962 {
8963 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8964# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8965 uint32_t cbInstr = sizeof(paNative[0]);
8966 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8967 if (RT_SUCCESS(rc))
8968 {
8969# if defined(RT_ARCH_AMD64)
8970 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8971 {
8972 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8973 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8974 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8975 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8976 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8977 uInfo & 0x8000 ? "recompiled" : "todo");
8978 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8979 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8980 else
8981 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8982 }
8983 else
8984# endif
8985 {
8986# ifdef RT_ARCH_AMD64
8987 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8988 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8989 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8990 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8991# elif defined(RT_ARCH_ARM64)
8992 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8993 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8994 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8995# else
8996# error "Port me"
8997# endif
8998 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8999 }
9000 }
9001 else
9002 {
9003# if defined(RT_ARCH_AMD64)
9004 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9005 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9006# else
9007 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9008# endif
9009 cbInstr = sizeof(paNative[0]);
9010 }
9011 offNative += cbInstr / sizeof(paNative[0]);
9012
9013# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9014 cs_insn *pInstr;
9015 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9016 (uintptr_t)pNativeCur, 1, &pInstr);
9017 if (cInstrs > 0)
9018 {
9019 Assert(cInstrs == 1);
9020# if defined(RT_ARCH_AMD64)
9021 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9022 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9023# else
9024 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9025 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9026# endif
9027 offNative += pInstr->size / sizeof(*pNativeCur);
9028 cs_free(pInstr, cInstrs);
9029 }
9030 else
9031 {
9032# if defined(RT_ARCH_AMD64)
9033 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9034 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9035# else
9036 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9037# endif
9038 offNative++;
9039 }
9040# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9041 }
9042 }
9043
9044#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9045 /* Cleanup. */
9046 cs_close(&hDisasm);
9047#endif
9048}
9049
9050
9051/**
9052 * Recompiles the given threaded TB into a native one.
9053 *
9054 * In case of failure the translation block will be returned as-is.
9055 *
9056 * @returns pTb.
9057 * @param pVCpu The cross context virtual CPU structure of the calling
9058 * thread.
9059 * @param pTb The threaded translation to recompile to native.
9060 */
9061DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9062{
9063#if 0 /* For profiling the native recompiler code. */
9064l_profile_again:
9065#endif
9066 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9067
9068 /*
9069 * The first time thru, we allocate the recompiler state, the other times
9070 * we just need to reset it before using it again.
9071 */
9072 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9073 if (RT_LIKELY(pReNative))
9074 iemNativeReInit(pReNative, pTb);
9075 else
9076 {
9077 pReNative = iemNativeInit(pVCpu, pTb);
9078 AssertReturn(pReNative, pTb);
9079 }
9080
9081#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9082 /*
9083 * First do liveness analysis. This is done backwards.
9084 */
9085 {
9086 uint32_t idxCall = pTb->Thrd.cCalls;
9087 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9088 { /* likely */ }
9089 else
9090 {
9091 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9092 while (idxCall > cAlloc)
9093 cAlloc *= 2;
9094 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9095 AssertReturn(pvNew, pTb);
9096 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9097 pReNative->cLivenessEntriesAlloc = cAlloc;
9098 }
9099 AssertReturn(idxCall > 0, pTb);
9100 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9101
9102 /* The initial (final) entry. */
9103 idxCall--;
9104 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9105
9106 /* Loop backwards thru the calls and fill in the other entries. */
9107 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9108 while (idxCall > 0)
9109 {
9110 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9111 if (pfnLiveness)
9112 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9113 else
9114 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9115 pCallEntry--;
9116 idxCall--;
9117 }
9118
9119# ifdef VBOX_WITH_STATISTICS
9120 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9121 to 'clobbered' rather that 'input'. */
9122 /** @todo */
9123# endif
9124 }
9125#endif
9126
9127 /*
9128 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9129 * for aborting if an error happens.
9130 */
9131 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9132#ifdef LOG_ENABLED
9133 uint32_t const cCallsOrg = cCallsLeft;
9134#endif
9135 uint32_t off = 0;
9136 int rc = VINF_SUCCESS;
9137 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9138 {
9139#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
9140 /*
9141 * Emit prolog code (fixed).
9142 */
9143 off = iemNativeEmitProlog(pReNative, off);
9144#endif
9145
9146 /*
9147 * Convert the calls to native code.
9148 */
9149#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9150 int32_t iGstInstr = -1;
9151#endif
9152#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9153 uint32_t cThreadedCalls = 0;
9154 uint32_t cRecompiledCalls = 0;
9155#endif
9156#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9157 uint32_t idxCurCall = 0;
9158#endif
9159 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9160 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9161 while (cCallsLeft-- > 0)
9162 {
9163 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9164#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9165 pReNative->idxCurCall = idxCurCall;
9166#endif
9167
9168 /*
9169 * Debug info, assembly markup and statistics.
9170 */
9171#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9172 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9173 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9174#endif
9175#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9176 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9177 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9178 {
9179 if (iGstInstr < (int32_t)pTb->cInstructions)
9180 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9181 else
9182 Assert(iGstInstr == pTb->cInstructions);
9183 iGstInstr = pCallEntry->idxInstr;
9184 }
9185 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9186#endif
9187#if defined(VBOX_STRICT)
9188 off = iemNativeEmitMarker(pReNative, off,
9189 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9190#endif
9191#if defined(VBOX_STRICT)
9192 iemNativeRegAssertSanity(pReNative);
9193#endif
9194#ifdef VBOX_WITH_STATISTICS
9195 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9196#endif
9197
9198 /*
9199 * Actual work.
9200 */
9201 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9202 pfnRecom ? "(recompiled)" : "(todo)"));
9203 if (pfnRecom) /** @todo stats on this. */
9204 {
9205 off = pfnRecom(pReNative, off, pCallEntry);
9206 STAM_REL_STATS({cRecompiledCalls++;});
9207 }
9208 else
9209 {
9210 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9211 STAM_REL_STATS({cThreadedCalls++;});
9212 }
9213 Assert(off <= pReNative->cInstrBufAlloc);
9214 Assert(pReNative->cCondDepth == 0);
9215
9216#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9217 if (LogIs2Enabled())
9218 {
9219 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9220# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9221 static const char s_achState[] = "CUXI";
9222# else
9223 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9224# endif
9225
9226 char szGpr[17];
9227 for (unsigned i = 0; i < 16; i++)
9228 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9229 szGpr[16] = '\0';
9230
9231 char szSegBase[X86_SREG_COUNT + 1];
9232 char szSegLimit[X86_SREG_COUNT + 1];
9233 char szSegAttrib[X86_SREG_COUNT + 1];
9234 char szSegSel[X86_SREG_COUNT + 1];
9235 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9236 {
9237 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9238 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9239 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9240 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9241 }
9242 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9243 = szSegSel[X86_SREG_COUNT] = '\0';
9244
9245 char szEFlags[8];
9246 for (unsigned i = 0; i < 7; i++)
9247 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9248 szEFlags[7] = '\0';
9249
9250 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9251 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9252 }
9253#endif
9254
9255 /*
9256 * Advance.
9257 */
9258 pCallEntry++;
9259#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9260 idxCurCall++;
9261#endif
9262 }
9263
9264 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9265 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9266 if (!cThreadedCalls)
9267 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9268
9269#ifdef VBOX_WITH_STATISTICS
9270 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
9271#endif
9272
9273 /*
9274 * Emit the epilog code.
9275 */
9276 uint32_t idxReturnLabel;
9277 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9278
9279 /*
9280 * Generate special jump labels.
9281 */
9282 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9283 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9284 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
9285 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
9286 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9287 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9288
9289 /*
9290 * Generate simple TB tail labels that just calls a help with a pVCpu
9291 * arg and either return or longjmps/throws a non-zero status.
9292 *
9293 * The array entries must be ordered by enmLabel value so we can index
9294 * using fTailLabels bit numbers.
9295 */
9296 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9297 static struct
9298 {
9299 IEMNATIVELABELTYPE enmLabel;
9300 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9301 } const g_aSimpleTailLabels[] =
9302 {
9303 { kIemNativeLabelType_Invalid, NULL },
9304 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9305 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9306 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9307 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9308 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9309 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9310 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9311 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9312 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9313 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9314 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9315 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9316 };
9317
9318 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9319 AssertCompile(kIemNativeLabelType_Invalid == 0);
9320 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9321 if (fTailLabels)
9322 {
9323 do
9324 {
9325 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9326 fTailLabels &= ~RT_BIT_64(enmLabel);
9327 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9328
9329 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9330 Assert(idxLabel != UINT32_MAX);
9331 if (idxLabel != UINT32_MAX)
9332 {
9333 iemNativeLabelDefine(pReNative, idxLabel, off);
9334
9335 /* int pfnCallback(PVMCPUCC pVCpu) */
9336 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9337 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9338
9339 /* jump back to the return sequence. */
9340 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9341 }
9342
9343 } while (fTailLabels);
9344 }
9345 }
9346 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9347 {
9348 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9349 return pTb;
9350 }
9351 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9352 Assert(off <= pReNative->cInstrBufAlloc);
9353
9354 /*
9355 * Make sure all labels has been defined.
9356 */
9357 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9358#ifdef VBOX_STRICT
9359 uint32_t const cLabels = pReNative->cLabels;
9360 for (uint32_t i = 0; i < cLabels; i++)
9361 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9362#endif
9363
9364#if 0 /* For profiling the native recompiler code. */
9365 if (pTb->Thrd.cCalls >= 136)
9366 {
9367 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9368 goto l_profile_again;
9369 }
9370#endif
9371
9372 /*
9373 * Allocate executable memory, copy over the code we've generated.
9374 */
9375 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9376 if (pTbAllocator->pDelayedFreeHead)
9377 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9378
9379 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9380 AssertReturn(paFinalInstrBuf, pTb);
9381 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9382
9383 /*
9384 * Apply fixups.
9385 */
9386 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9387 uint32_t const cFixups = pReNative->cFixups;
9388 for (uint32_t i = 0; i < cFixups; i++)
9389 {
9390 Assert(paFixups[i].off < off);
9391 Assert(paFixups[i].idxLabel < cLabels);
9392 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9393 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9394 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9395 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9396 switch (paFixups[i].enmType)
9397 {
9398#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9399 case kIemNativeFixupType_Rel32:
9400 Assert(paFixups[i].off + 4 <= off);
9401 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9402 continue;
9403
9404#elif defined(RT_ARCH_ARM64)
9405 case kIemNativeFixupType_RelImm26At0:
9406 {
9407 Assert(paFixups[i].off < off);
9408 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9409 Assert(offDisp >= -262144 && offDisp < 262144);
9410 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9411 continue;
9412 }
9413
9414 case kIemNativeFixupType_RelImm19At5:
9415 {
9416 Assert(paFixups[i].off < off);
9417 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9418 Assert(offDisp >= -262144 && offDisp < 262144);
9419 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9420 continue;
9421 }
9422
9423 case kIemNativeFixupType_RelImm14At5:
9424 {
9425 Assert(paFixups[i].off < off);
9426 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9427 Assert(offDisp >= -8192 && offDisp < 8192);
9428 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9429 continue;
9430 }
9431
9432#endif
9433 case kIemNativeFixupType_Invalid:
9434 case kIemNativeFixupType_End:
9435 break;
9436 }
9437 AssertFailed();
9438 }
9439
9440 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9441 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9442
9443 /*
9444 * Convert the translation block.
9445 */
9446 RTMemFree(pTb->Thrd.paCalls);
9447 pTb->Native.paInstructions = paFinalInstrBuf;
9448 pTb->Native.cInstructions = off;
9449 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9450#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9451 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9452 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9453#endif
9454
9455 Assert(pTbAllocator->cThreadedTbs > 0);
9456 pTbAllocator->cThreadedTbs -= 1;
9457 pTbAllocator->cNativeTbs += 1;
9458 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9459
9460#ifdef LOG_ENABLED
9461 /*
9462 * Disassemble to the log if enabled.
9463 */
9464 if (LogIs3Enabled())
9465 {
9466 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9467 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9468# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9469 RTLogFlush(NULL);
9470# endif
9471 }
9472#endif
9473 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9474
9475 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9476 return pTb;
9477}
9478
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette