VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104361

Last change on this file since 104361 was 104361, checked in by vboxsync, 10 months ago

VMM/IEM: Moved the native TB exit statistics on g_aSimpleTailLabels to the helper functions and made them release stats. Eliminate two duplicate stats. bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 412.1 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104361 2024-04-18 14:34:59Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
138 iemRaiseDivideErrorJmp(pVCpu);
139#ifndef _MSC_VER
140 return VINF_IEM_RAISED_XCPT; /* not reached */
141#endif
142}
143
144
145/**
146 * Used by TB code when it wants to raise a \#UD.
147 */
148IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
149{
150 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
151 iemRaiseUndefinedOpcodeJmp(pVCpu);
152#ifndef _MSC_VER
153 return VINF_IEM_RAISED_XCPT; /* not reached */
154#endif
155}
156
157
158/**
159 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
160 *
161 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
162 */
163IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
164{
165 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
166 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
167 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
168 iemRaiseUndefinedOpcodeJmp(pVCpu);
169 else
170 iemRaiseDeviceNotAvailableJmp(pVCpu);
171#ifndef _MSC_VER
172 return VINF_IEM_RAISED_XCPT; /* not reached */
173#endif
174}
175
176
177/**
178 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
179 *
180 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
181 */
182IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
183{
184 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
185 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
186 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
187 iemRaiseUndefinedOpcodeJmp(pVCpu);
188 else
189 iemRaiseDeviceNotAvailableJmp(pVCpu);
190#ifndef _MSC_VER
191 return VINF_IEM_RAISED_XCPT; /* not reached */
192#endif
193}
194
195
196/**
197 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
198 *
199 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
200 */
201IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
202{
203 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
204 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
205 iemRaiseSimdFpExceptionJmp(pVCpu);
206 else
207 iemRaiseUndefinedOpcodeJmp(pVCpu);
208#ifndef _MSC_VER
209 return VINF_IEM_RAISED_XCPT; /* not reached */
210#endif
211}
212
213
214/**
215 * Used by TB code when it wants to raise a \#NM.
216 */
217IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
218{
219 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
220 iemRaiseDeviceNotAvailableJmp(pVCpu);
221#ifndef _MSC_VER
222 return VINF_IEM_RAISED_XCPT; /* not reached */
223#endif
224}
225
226
227/**
228 * Used by TB code when it wants to raise a \#GP(0).
229 */
230IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
231{
232 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
233 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
234#ifndef _MSC_VER
235 return VINF_IEM_RAISED_XCPT; /* not reached */
236#endif
237}
238
239
240/**
241 * Used by TB code when it wants to raise a \#MF.
242 */
243IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
244{
245 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
246 iemRaiseMathFaultJmp(pVCpu);
247#ifndef _MSC_VER
248 return VINF_IEM_RAISED_XCPT; /* not reached */
249#endif
250}
251
252
253/**
254 * Used by TB code when it wants to raise a \#XF.
255 */
256IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
257{
258 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
259 iemRaiseSimdFpExceptionJmp(pVCpu);
260#ifndef _MSC_VER
261 return VINF_IEM_RAISED_XCPT; /* not reached */
262#endif
263}
264
265
266/**
267 * Used by TB code when detecting opcode changes.
268 * @see iemThreadeFuncWorkerObsoleteTb
269 */
270IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
271{
272 /* We set fSafeToFree to false where as we're being called in the context
273 of a TB callback function, which for native TBs means we cannot release
274 the executable memory till we've returned our way back to iemTbExec as
275 that return path codes via the native code generated for the TB. */
276 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
277 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
278 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
279 return VINF_IEM_REEXEC_BREAK;
280}
281
282
283/**
284 * Used by TB code when we need to switch to a TB with CS.LIM checking.
285 */
286IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
287{
288 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
289 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
290 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
291 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
292 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
293 return VINF_IEM_REEXEC_BREAK;
294}
295
296
297/**
298 * Used by TB code when we missed a PC check after a branch.
299 */
300IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
301{
302 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
303 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
304 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
305 pVCpu->iem.s.pbInstrBuf));
306 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
307 return VINF_IEM_REEXEC_BREAK;
308}
309
310
311
312/*********************************************************************************************************************************
313* Helpers: Segmented memory fetches and stores. *
314*********************************************************************************************************************************/
315
316/**
317 * Used by TB code to load unsigned 8-bit data w/ segmentation.
318 */
319IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
320{
321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
322 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
323#else
324 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
325#endif
326}
327
328
329/**
330 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
331 * to 16 bits.
332 */
333IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
334{
335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
336 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
337#else
338 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
339#endif
340}
341
342
343/**
344 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
345 * to 32 bits.
346 */
347IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
348{
349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
350 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
351#else
352 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
353#endif
354}
355
356/**
357 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
358 * to 64 bits.
359 */
360IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
361{
362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
363 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
364#else
365 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
366#endif
367}
368
369
370/**
371 * Used by TB code to load unsigned 16-bit data w/ segmentation.
372 */
373IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
374{
375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
376 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
377#else
378 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
379#endif
380}
381
382
383/**
384 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
385 * to 32 bits.
386 */
387IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
388{
389#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
390 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
391#else
392 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
393#endif
394}
395
396
397/**
398 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
399 * to 64 bits.
400 */
401IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
402{
403#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
404 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
405#else
406 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
407#endif
408}
409
410
411/**
412 * Used by TB code to load unsigned 32-bit data w/ segmentation.
413 */
414IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
415{
416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
417 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
418#else
419 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
420#endif
421}
422
423
424/**
425 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
426 * to 64 bits.
427 */
428IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
429{
430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
431 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
432#else
433 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
434#endif
435}
436
437
438/**
439 * Used by TB code to load unsigned 64-bit data w/ segmentation.
440 */
441IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
442{
443#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
444 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
445#else
446 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
447#endif
448}
449
450
451#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
452/**
453 * Used by TB code to load 128-bit data w/ segmentation.
454 */
455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
456{
457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
458 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
459#else
460 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
461#endif
462}
463
464
465/**
466 * Used by TB code to load 128-bit data w/ segmentation.
467 */
468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
469{
470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
471 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
472#else
473 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
474#endif
475}
476
477
478/**
479 * Used by TB code to load 128-bit data w/ segmentation.
480 */
481IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
482{
483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
484 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
485#else
486 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
487#endif
488}
489
490
491/**
492 * Used by TB code to load 256-bit data w/ segmentation.
493 */
494IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
495{
496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
497 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
498#else
499 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
500#endif
501}
502
503
504/**
505 * Used by TB code to load 256-bit data w/ segmentation.
506 */
507IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
508{
509#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
510 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
511#else
512 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
513#endif
514}
515#endif
516
517
518/**
519 * Used by TB code to store unsigned 8-bit data w/ segmentation.
520 */
521IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
522{
523#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
524 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
525#else
526 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
527#endif
528}
529
530
531/**
532 * Used by TB code to store unsigned 16-bit data w/ segmentation.
533 */
534IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
535{
536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
537 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
538#else
539 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
540#endif
541}
542
543
544/**
545 * Used by TB code to store unsigned 32-bit data w/ segmentation.
546 */
547IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
548{
549#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
550 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
551#else
552 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
553#endif
554}
555
556
557/**
558 * Used by TB code to store unsigned 64-bit data w/ segmentation.
559 */
560IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
561{
562#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
563 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
564#else
565 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
566#endif
567}
568
569
570#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
571/**
572 * Used by TB code to store unsigned 128-bit data w/ segmentation.
573 */
574IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
575{
576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
577 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
578#else
579 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
580#endif
581}
582
583
584/**
585 * Used by TB code to store unsigned 128-bit data w/ segmentation.
586 */
587IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
588{
589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
590 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
591#else
592 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
593#endif
594}
595
596
597/**
598 * Used by TB code to store unsigned 256-bit data w/ segmentation.
599 */
600IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
601{
602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
603 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
604#else
605 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
606#endif
607}
608
609
610/**
611 * Used by TB code to store unsigned 256-bit data w/ segmentation.
612 */
613IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
614{
615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
616 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
617#else
618 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
619#endif
620}
621#endif
622
623
624
625/**
626 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
627 */
628IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
629{
630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
631 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
632#else
633 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
634#endif
635}
636
637
638/**
639 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
640 */
641IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
642{
643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
644 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
645#else
646 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
647#endif
648}
649
650
651/**
652 * Used by TB code to store an 32-bit selector value onto a generic stack.
653 *
654 * Intel CPUs doesn't do write a whole dword, thus the special function.
655 */
656IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
657{
658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
659 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
660#else
661 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
662#endif
663}
664
665
666/**
667 * Used by TB code to push unsigned 64-bit value onto a generic stack.
668 */
669IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
670{
671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
672 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
673#else
674 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
675#endif
676}
677
678
679/**
680 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
681 */
682IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
683{
684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
685 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
686#else
687 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
688#endif
689}
690
691
692/**
693 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
694 */
695IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
696{
697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
698 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
699#else
700 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
701#endif
702}
703
704
705/**
706 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
707 */
708IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
709{
710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
711 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
712#else
713 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
714#endif
715}
716
717
718
719/*********************************************************************************************************************************
720* Helpers: Flat memory fetches and stores. *
721*********************************************************************************************************************************/
722
723/**
724 * Used by TB code to load unsigned 8-bit data w/ flat address.
725 * @note Zero extending the value to 64-bit to simplify assembly.
726 */
727IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
728{
729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
730 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
731#else
732 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
733#endif
734}
735
736
737/**
738 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
739 * to 16 bits.
740 * @note Zero extending the value to 64-bit to simplify assembly.
741 */
742IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
746#else
747 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
754 * to 32 bits.
755 * @note Zero extending the value to 64-bit to simplify assembly.
756 */
757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
758{
759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
760 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
761#else
762 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
763#endif
764}
765
766
767/**
768 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
769 * to 64 bits.
770 */
771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
772{
773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
774 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
775#else
776 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
777#endif
778}
779
780
781/**
782 * Used by TB code to load unsigned 16-bit data w/ flat address.
783 * @note Zero extending the value to 64-bit to simplify assembly.
784 */
785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
786{
787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
788 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
789#else
790 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
791#endif
792}
793
794
795/**
796 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
797 * to 32 bits.
798 * @note Zero extending the value to 64-bit to simplify assembly.
799 */
800IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
801{
802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
803 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
804#else
805 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
806#endif
807}
808
809
810/**
811 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
812 * to 64 bits.
813 * @note Zero extending the value to 64-bit to simplify assembly.
814 */
815IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
816{
817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
818 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
819#else
820 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
821#endif
822}
823
824
825/**
826 * Used by TB code to load unsigned 32-bit data w/ flat address.
827 * @note Zero extending the value to 64-bit to simplify assembly.
828 */
829IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
830{
831#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
832 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
833#else
834 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
835#endif
836}
837
838
839/**
840 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
841 * to 64 bits.
842 * @note Zero extending the value to 64-bit to simplify assembly.
843 */
844IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
845{
846#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
847 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
848#else
849 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
850#endif
851}
852
853
854/**
855 * Used by TB code to load unsigned 64-bit data w/ flat address.
856 */
857IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
858{
859#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
860 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
861#else
862 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
863#endif
864}
865
866
867#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
868/**
869 * Used by TB code to load unsigned 128-bit data w/ flat address.
870 */
871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
872{
873#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
874 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
875#else
876 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
877#endif
878}
879
880
881/**
882 * Used by TB code to load unsigned 128-bit data w/ flat address.
883 */
884IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
885{
886#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
887 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
888#else
889 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
890#endif
891}
892
893
894/**
895 * Used by TB code to load unsigned 128-bit data w/ flat address.
896 */
897IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
898{
899#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
900 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
901#else
902 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
903#endif
904}
905
906
907/**
908 * Used by TB code to load unsigned 256-bit data w/ flat address.
909 */
910IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
911{
912#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
913 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
914#else
915 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
916#endif
917}
918
919
920/**
921 * Used by TB code to load unsigned 256-bit data w/ flat address.
922 */
923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
924{
925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
926 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
927#else
928 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
929#endif
930}
931#endif
932
933
934/**
935 * Used by TB code to store unsigned 8-bit data w/ flat address.
936 */
937IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
938{
939#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
940 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
941#else
942 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
943#endif
944}
945
946
947/**
948 * Used by TB code to store unsigned 16-bit data w/ flat address.
949 */
950IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
951{
952#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
953 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
954#else
955 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
956#endif
957}
958
959
960/**
961 * Used by TB code to store unsigned 32-bit data w/ flat address.
962 */
963IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
964{
965#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
966 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
967#else
968 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
969#endif
970}
971
972
973/**
974 * Used by TB code to store unsigned 64-bit data w/ flat address.
975 */
976IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
977{
978#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
979 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
980#else
981 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
982#endif
983}
984
985
986#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
987/**
988 * Used by TB code to store unsigned 128-bit data w/ flat address.
989 */
990IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
991{
992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
993 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
994#else
995 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
996#endif
997}
998
999
1000/**
1001 * Used by TB code to store unsigned 128-bit data w/ flat address.
1002 */
1003IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1004{
1005#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1006 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1007#else
1008 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1009#endif
1010}
1011
1012
1013/**
1014 * Used by TB code to store unsigned 256-bit data w/ flat address.
1015 */
1016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1017{
1018#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1019 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1020#else
1021 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1022#endif
1023}
1024
1025
1026/**
1027 * Used by TB code to store unsigned 256-bit data w/ flat address.
1028 */
1029IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1030{
1031#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1032 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1033#else
1034 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1035#endif
1036}
1037#endif
1038
1039
1040
1041/**
1042 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1043 */
1044IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1045{
1046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1047 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1048#else
1049 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1050#endif
1051}
1052
1053
1054/**
1055 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1056 */
1057IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1058{
1059#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1060 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1061#else
1062 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1063#endif
1064}
1065
1066
1067/**
1068 * Used by TB code to store a segment selector value onto a flat stack.
1069 *
1070 * Intel CPUs doesn't do write a whole dword, thus the special function.
1071 */
1072IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1073{
1074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1075 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1076#else
1077 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1078#endif
1079}
1080
1081
1082/**
1083 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1084 */
1085IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1086{
1087#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1088 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1089#else
1090 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1091#endif
1092}
1093
1094
1095/**
1096 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1097 */
1098IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1099{
1100#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1101 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1102#else
1103 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1104#endif
1105}
1106
1107
1108/**
1109 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1110 */
1111IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1112{
1113#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1114 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1115#else
1116 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1117#endif
1118}
1119
1120
1121/**
1122 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1123 */
1124IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1125{
1126#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1127 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1128#else
1129 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1130#endif
1131}
1132
1133
1134
1135/*********************************************************************************************************************************
1136* Helpers: Segmented memory mapping. *
1137*********************************************************************************************************************************/
1138
1139/**
1140 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1141 * segmentation.
1142 */
1143IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1144 RTGCPTR GCPtrMem, uint8_t iSegReg))
1145{
1146#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1147 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1148#else
1149 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1150#endif
1151}
1152
1153
1154/**
1155 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1156 */
1157IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1158 RTGCPTR GCPtrMem, uint8_t iSegReg))
1159{
1160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1161 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1162#else
1163 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1164#endif
1165}
1166
1167
1168/**
1169 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1170 */
1171IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1172 RTGCPTR GCPtrMem, uint8_t iSegReg))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1175 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1176#else
1177 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1186 RTGCPTR GCPtrMem, uint8_t iSegReg))
1187{
1188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1189 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1190#else
1191 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1192#endif
1193}
1194
1195
1196/**
1197 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1198 * segmentation.
1199 */
1200IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1201 RTGCPTR GCPtrMem, uint8_t iSegReg))
1202{
1203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1204 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1205#else
1206 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1207#endif
1208}
1209
1210
1211/**
1212 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1213 */
1214IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1215 RTGCPTR GCPtrMem, uint8_t iSegReg))
1216{
1217#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1218 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1219#else
1220 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1221#endif
1222}
1223
1224
1225/**
1226 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1227 */
1228IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1229 RTGCPTR GCPtrMem, uint8_t iSegReg))
1230{
1231#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1232 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1233#else
1234 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1235#endif
1236}
1237
1238
1239/**
1240 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1241 */
1242IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1243 RTGCPTR GCPtrMem, uint8_t iSegReg))
1244{
1245#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1246 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1247#else
1248 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1249#endif
1250}
1251
1252
1253/**
1254 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1255 * segmentation.
1256 */
1257IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1258 RTGCPTR GCPtrMem, uint8_t iSegReg))
1259{
1260#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1261 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1262#else
1263 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1264#endif
1265}
1266
1267
1268/**
1269 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1270 */
1271IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1272 RTGCPTR GCPtrMem, uint8_t iSegReg))
1273{
1274#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1275 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1276#else
1277 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1278#endif
1279}
1280
1281
1282/**
1283 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1284 */
1285IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1286 RTGCPTR GCPtrMem, uint8_t iSegReg))
1287{
1288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1289 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1290#else
1291 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1292#endif
1293}
1294
1295
1296/**
1297 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1298 */
1299IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1300 RTGCPTR GCPtrMem, uint8_t iSegReg))
1301{
1302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1303 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1304#else
1305 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1306#endif
1307}
1308
1309
1310/**
1311 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1312 * segmentation.
1313 */
1314IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1315 RTGCPTR GCPtrMem, uint8_t iSegReg))
1316{
1317#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1318 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1319#else
1320 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1321#endif
1322}
1323
1324
1325/**
1326 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1327 */
1328IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1329 RTGCPTR GCPtrMem, uint8_t iSegReg))
1330{
1331#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1332 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1333#else
1334 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1335#endif
1336}
1337
1338
1339/**
1340 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1341 */
1342IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1343 RTGCPTR GCPtrMem, uint8_t iSegReg))
1344{
1345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1346 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1347#else
1348 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1349#endif
1350}
1351
1352
1353/**
1354 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1355 */
1356IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1357 RTGCPTR GCPtrMem, uint8_t iSegReg))
1358{
1359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1360 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1361#else
1362 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1363#endif
1364}
1365
1366
1367/**
1368 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1369 */
1370IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1371 RTGCPTR GCPtrMem, uint8_t iSegReg))
1372{
1373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1374 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1375#else
1376 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1377#endif
1378}
1379
1380
1381/**
1382 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1383 */
1384IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1385 RTGCPTR GCPtrMem, uint8_t iSegReg))
1386{
1387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1388 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1389#else
1390 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1391#endif
1392}
1393
1394
1395/**
1396 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1397 * segmentation.
1398 */
1399IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1400 RTGCPTR GCPtrMem, uint8_t iSegReg))
1401{
1402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1403 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1404#else
1405 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1406#endif
1407}
1408
1409
1410/**
1411 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1412 */
1413IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1414 RTGCPTR GCPtrMem, uint8_t iSegReg))
1415{
1416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1417 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1418#else
1419 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1420#endif
1421}
1422
1423
1424/**
1425 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1426 */
1427IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1428 RTGCPTR GCPtrMem, uint8_t iSegReg))
1429{
1430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1431 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1432#else
1433 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1434#endif
1435}
1436
1437
1438/**
1439 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1440 */
1441IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1442 RTGCPTR GCPtrMem, uint8_t iSegReg))
1443{
1444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1445 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1446#else
1447 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1448#endif
1449}
1450
1451
1452/*********************************************************************************************************************************
1453* Helpers: Flat memory mapping. *
1454*********************************************************************************************************************************/
1455
1456/**
1457 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1458 * address.
1459 */
1460IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1461{
1462#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1463 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1464#else
1465 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1466#endif
1467}
1468
1469
1470/**
1471 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1472 */
1473IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1474{
1475#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1476 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1477#else
1478 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1479#endif
1480}
1481
1482
1483/**
1484 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1485 */
1486IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1487{
1488#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1489 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1490#else
1491 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1492#endif
1493}
1494
1495
1496/**
1497 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1498 */
1499IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1500{
1501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1502 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1503#else
1504 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1505#endif
1506}
1507
1508
1509/**
1510 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1511 * address.
1512 */
1513IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1514{
1515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1516 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1517#else
1518 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1519#endif
1520}
1521
1522
1523/**
1524 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1525 */
1526IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1527{
1528#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1529 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1530#else
1531 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1532#endif
1533}
1534
1535
1536/**
1537 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1538 */
1539IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1540{
1541#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1542 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1543#else
1544 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1545#endif
1546}
1547
1548
1549/**
1550 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1551 */
1552IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1553{
1554#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1555 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1556#else
1557 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1558#endif
1559}
1560
1561
1562/**
1563 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1564 * address.
1565 */
1566IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1567{
1568#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1569 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1570#else
1571 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1572#endif
1573}
1574
1575
1576/**
1577 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1578 */
1579IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1580{
1581#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1582 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1583#else
1584 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1585#endif
1586}
1587
1588
1589/**
1590 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1591 */
1592IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1593{
1594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1595 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1596#else
1597 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1598#endif
1599}
1600
1601
1602/**
1603 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1604 */
1605IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1609#else
1610 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1617 * address.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1623#else
1624 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1633{
1634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1635 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1636#else
1637 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1638#endif
1639}
1640
1641
1642/**
1643 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1646{
1647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1648 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1649#else
1650 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1651#endif
1652}
1653
1654
1655/**
1656 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1657 */
1658IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1659{
1660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1661 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1662#else
1663 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1664#endif
1665}
1666
1667
1668/**
1669 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1670 */
1671IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1672{
1673#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1674 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1675#else
1676 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1677#endif
1678}
1679
1680
1681/**
1682 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1685{
1686#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1687 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1688#else
1689 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1696 * address.
1697 */
1698IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1699{
1700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1701 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1702#else
1703 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1704#endif
1705}
1706
1707
1708/**
1709 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1710 */
1711IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1712{
1713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1714 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1715#else
1716 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1717#endif
1718}
1719
1720
1721/**
1722 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1723 */
1724IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1725{
1726#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1727 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1728#else
1729 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1730#endif
1731}
1732
1733
1734/**
1735 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1736 */
1737IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1738{
1739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1740 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1741#else
1742 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1743#endif
1744}
1745
1746
1747/*********************************************************************************************************************************
1748* Helpers: Commit, rollback & unmap *
1749*********************************************************************************************************************************/
1750
1751/**
1752 * Used by TB code to commit and unmap a read-write memory mapping.
1753 */
1754IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1755{
1756 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1757}
1758
1759
1760/**
1761 * Used by TB code to commit and unmap a read-write memory mapping.
1762 */
1763IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1764{
1765 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1766}
1767
1768
1769/**
1770 * Used by TB code to commit and unmap a write-only memory mapping.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1773{
1774 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1775}
1776
1777
1778/**
1779 * Used by TB code to commit and unmap a read-only memory mapping.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1782{
1783 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1784}
1785
1786
1787/**
1788 * Reinitializes the native recompiler state.
1789 *
1790 * Called before starting a new recompile job.
1791 */
1792static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1793{
1794 pReNative->cLabels = 0;
1795 pReNative->bmLabelTypes = 0;
1796 pReNative->cFixups = 0;
1797#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1798 pReNative->pDbgInfo->cEntries = 0;
1799 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1800#endif
1801 pReNative->pTbOrg = pTb;
1802 pReNative->cCondDepth = 0;
1803 pReNative->uCondSeqNo = 0;
1804 pReNative->uCheckIrqSeqNo = 0;
1805 pReNative->uTlbSeqNo = 0;
1806
1807#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1808 pReNative->Core.offPc = 0;
1809 pReNative->Core.cInstrPcUpdateSkipped = 0;
1810#endif
1811#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1812 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1813#endif
1814 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1815#if IEMNATIVE_HST_GREG_COUNT < 32
1816 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1817#endif
1818 ;
1819 pReNative->Core.bmHstRegsWithGstShadow = 0;
1820 pReNative->Core.bmGstRegShadows = 0;
1821#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1822 pReNative->Core.bmGstRegShadowDirty = 0;
1823#endif
1824 pReNative->Core.bmVars = 0;
1825 pReNative->Core.bmStack = 0;
1826 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1827 pReNative->Core.u64ArgVars = UINT64_MAX;
1828
1829 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
1830 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1837 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1838 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1839 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1840 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1841 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1842 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1843 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1844 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1845 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1846 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1847
1848 /* Full host register reinit: */
1849 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1850 {
1851 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1852 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1853 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1854 }
1855
1856 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1857 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1858#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1859 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1860#endif
1861#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1862 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1863#endif
1864#ifdef IEMNATIVE_REG_FIXED_TMP1
1865 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1866#endif
1867#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1868 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1869#endif
1870 );
1871 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1872 {
1873 fRegs &= ~RT_BIT_32(idxReg);
1874 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1875 }
1876
1877 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1878#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1879 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1880#endif
1881#ifdef IEMNATIVE_REG_FIXED_TMP0
1882 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1883#endif
1884#ifdef IEMNATIVE_REG_FIXED_TMP1
1885 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1886#endif
1887#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1888 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1889#endif
1890
1891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1892 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1893# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1894 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1895# endif
1896 ;
1897 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1898 pReNative->Core.bmGstSimdRegShadows = 0;
1899 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1900 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1901
1902 /* Full host register reinit: */
1903 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1904 {
1905 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1906 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1907 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1908 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1909 }
1910
1911 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1912 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1913 {
1914 fRegs &= ~RT_BIT_32(idxReg);
1915 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1916 }
1917
1918#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1919 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1920#endif
1921
1922#endif
1923
1924 return pReNative;
1925}
1926
1927
1928/**
1929 * Allocates and initializes the native recompiler state.
1930 *
1931 * This is called the first time an EMT wants to recompile something.
1932 *
1933 * @returns Pointer to the new recompiler state.
1934 * @param pVCpu The cross context virtual CPU structure of the calling
1935 * thread.
1936 * @param pTb The TB that's about to be recompiled.
1937 * @thread EMT(pVCpu)
1938 */
1939static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1940{
1941 VMCPU_ASSERT_EMT(pVCpu);
1942
1943 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1944 AssertReturn(pReNative, NULL);
1945
1946 /*
1947 * Try allocate all the buffers and stuff we need.
1948 */
1949 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1950 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1951 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1952#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1953 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1954#endif
1955 if (RT_LIKELY( pReNative->pInstrBuf
1956 && pReNative->paLabels
1957 && pReNative->paFixups)
1958#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1959 && pReNative->pDbgInfo
1960#endif
1961 )
1962 {
1963 /*
1964 * Set the buffer & array sizes on success.
1965 */
1966 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1967 pReNative->cLabelsAlloc = _8K;
1968 pReNative->cFixupsAlloc = _16K;
1969#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1970 pReNative->cDbgInfoAlloc = _16K;
1971#endif
1972
1973 /* Other constant stuff: */
1974 pReNative->pVCpu = pVCpu;
1975
1976 /*
1977 * Done, just need to save it and reinit it.
1978 */
1979 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1980 return iemNativeReInit(pReNative, pTb);
1981 }
1982
1983 /*
1984 * Failed. Cleanup and return.
1985 */
1986 AssertFailed();
1987 RTMemFree(pReNative->pInstrBuf);
1988 RTMemFree(pReNative->paLabels);
1989 RTMemFree(pReNative->paFixups);
1990#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1991 RTMemFree(pReNative->pDbgInfo);
1992#endif
1993 RTMemFree(pReNative);
1994 return NULL;
1995}
1996
1997
1998/**
1999 * Creates a label
2000 *
2001 * If the label does not yet have a defined position,
2002 * call iemNativeLabelDefine() later to set it.
2003 *
2004 * @returns Label ID. Throws VBox status code on failure, so no need to check
2005 * the return value.
2006 * @param pReNative The native recompile state.
2007 * @param enmType The label type.
2008 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2009 * label is not yet defined (default).
2010 * @param uData Data associated with the lable. Only applicable to
2011 * certain type of labels. Default is zero.
2012 */
2013DECL_HIDDEN_THROW(uint32_t)
2014iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2015 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2016{
2017 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2018
2019 /*
2020 * Locate existing label definition.
2021 *
2022 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2023 * and uData is zero.
2024 */
2025 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2026 uint32_t const cLabels = pReNative->cLabels;
2027 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2028#ifndef VBOX_STRICT
2029 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2030 && offWhere == UINT32_MAX
2031 && uData == 0
2032#endif
2033 )
2034 {
2035#ifndef VBOX_STRICT
2036 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2037 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2038 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2039 if (idxLabel < pReNative->cLabels)
2040 return idxLabel;
2041#else
2042 for (uint32_t i = 0; i < cLabels; i++)
2043 if ( paLabels[i].enmType == enmType
2044 && paLabels[i].uData == uData)
2045 {
2046 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2047 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2048 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2049 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2050 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2051 return i;
2052 }
2053 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2054 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2055#endif
2056 }
2057
2058 /*
2059 * Make sure we've got room for another label.
2060 */
2061 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2062 { /* likely */ }
2063 else
2064 {
2065 uint32_t cNew = pReNative->cLabelsAlloc;
2066 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2067 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2068 cNew *= 2;
2069 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2070 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2071 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2072 pReNative->paLabels = paLabels;
2073 pReNative->cLabelsAlloc = cNew;
2074 }
2075
2076 /*
2077 * Define a new label.
2078 */
2079 paLabels[cLabels].off = offWhere;
2080 paLabels[cLabels].enmType = enmType;
2081 paLabels[cLabels].uData = uData;
2082 pReNative->cLabels = cLabels + 1;
2083
2084 Assert((unsigned)enmType < 64);
2085 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2086
2087 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2088 {
2089 Assert(uData == 0);
2090 pReNative->aidxUniqueLabels[enmType] = cLabels;
2091 }
2092
2093 if (offWhere != UINT32_MAX)
2094 {
2095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2096 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2097 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2098#endif
2099 }
2100 return cLabels;
2101}
2102
2103
2104/**
2105 * Defines the location of an existing label.
2106 *
2107 * @param pReNative The native recompile state.
2108 * @param idxLabel The label to define.
2109 * @param offWhere The position.
2110 */
2111DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2112{
2113 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2114 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2115 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2116 pLabel->off = offWhere;
2117#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2118 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2119 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2120#endif
2121}
2122
2123
2124/**
2125 * Looks up a lable.
2126 *
2127 * @returns Label ID if found, UINT32_MAX if not.
2128 */
2129static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2130 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2131{
2132 Assert((unsigned)enmType < 64);
2133 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2134 {
2135 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2136 return pReNative->aidxUniqueLabels[enmType];
2137
2138 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2139 uint32_t const cLabels = pReNative->cLabels;
2140 for (uint32_t i = 0; i < cLabels; i++)
2141 if ( paLabels[i].enmType == enmType
2142 && paLabels[i].uData == uData
2143 && ( paLabels[i].off == offWhere
2144 || offWhere == UINT32_MAX
2145 || paLabels[i].off == UINT32_MAX))
2146 return i;
2147 }
2148 return UINT32_MAX;
2149}
2150
2151
2152/**
2153 * Adds a fixup.
2154 *
2155 * @throws VBox status code (int) on failure.
2156 * @param pReNative The native recompile state.
2157 * @param offWhere The instruction offset of the fixup location.
2158 * @param idxLabel The target label ID for the fixup.
2159 * @param enmType The fixup type.
2160 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2161 */
2162DECL_HIDDEN_THROW(void)
2163iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2164 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2165{
2166 Assert(idxLabel <= UINT16_MAX);
2167 Assert((unsigned)enmType <= UINT8_MAX);
2168#ifdef RT_ARCH_ARM64
2169 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2170 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2171 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2172#endif
2173
2174 /*
2175 * Make sure we've room.
2176 */
2177 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2178 uint32_t const cFixups = pReNative->cFixups;
2179 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2180 { /* likely */ }
2181 else
2182 {
2183 uint32_t cNew = pReNative->cFixupsAlloc;
2184 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2185 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2186 cNew *= 2;
2187 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2188 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2189 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2190 pReNative->paFixups = paFixups;
2191 pReNative->cFixupsAlloc = cNew;
2192 }
2193
2194 /*
2195 * Add the fixup.
2196 */
2197 paFixups[cFixups].off = offWhere;
2198 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2199 paFixups[cFixups].enmType = enmType;
2200 paFixups[cFixups].offAddend = offAddend;
2201 pReNative->cFixups = cFixups + 1;
2202}
2203
2204
2205/**
2206 * Slow code path for iemNativeInstrBufEnsure.
2207 */
2208DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2209{
2210 /* Double the buffer size till we meet the request. */
2211 uint32_t cNew = pReNative->cInstrBufAlloc;
2212 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2213 do
2214 cNew *= 2;
2215 while (cNew < off + cInstrReq);
2216
2217 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2218#ifdef RT_ARCH_ARM64
2219 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2220#else
2221 uint32_t const cbMaxInstrBuf = _2M;
2222#endif
2223 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2224
2225 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2226 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2227
2228#ifdef VBOX_STRICT
2229 pReNative->offInstrBufChecked = off + cInstrReq;
2230#endif
2231 pReNative->cInstrBufAlloc = cNew;
2232 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2233}
2234
2235#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2236
2237/**
2238 * Grows the static debug info array used during recompilation.
2239 *
2240 * @returns Pointer to the new debug info block; throws VBox status code on
2241 * failure, so no need to check the return value.
2242 */
2243DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2244{
2245 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2246 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2247 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2248 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2249 pReNative->pDbgInfo = pDbgInfo;
2250 pReNative->cDbgInfoAlloc = cNew;
2251 return pDbgInfo;
2252}
2253
2254
2255/**
2256 * Adds a new debug info uninitialized entry, returning the pointer to it.
2257 */
2258DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2259{
2260 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2261 { /* likely */ }
2262 else
2263 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2264 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2265}
2266
2267
2268/**
2269 * Debug Info: Adds a native offset record, if necessary.
2270 */
2271DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2272{
2273 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2274
2275 /*
2276 * Do we need this one?
2277 */
2278 uint32_t const offPrev = pDbgInfo->offNativeLast;
2279 if (offPrev == off)
2280 return;
2281 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2282
2283 /*
2284 * Add it.
2285 */
2286 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2287 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2288 pEntry->NativeOffset.offNative = off;
2289 pDbgInfo->offNativeLast = off;
2290}
2291
2292
2293/**
2294 * Debug Info: Record info about a label.
2295 */
2296static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2297{
2298 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2299 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2300 pEntry->Label.uUnused = 0;
2301 pEntry->Label.enmLabel = (uint8_t)enmType;
2302 pEntry->Label.uData = uData;
2303}
2304
2305
2306/**
2307 * Debug Info: Record info about a threaded call.
2308 */
2309static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2310{
2311 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2312 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2313 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2314 pEntry->ThreadedCall.uUnused = 0;
2315 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2316}
2317
2318
2319/**
2320 * Debug Info: Record info about a new guest instruction.
2321 */
2322static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2323{
2324 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2325 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2326 pEntry->GuestInstruction.uUnused = 0;
2327 pEntry->GuestInstruction.fExec = fExec;
2328}
2329
2330
2331/**
2332 * Debug Info: Record info about guest register shadowing.
2333 */
2334DECL_HIDDEN_THROW(void)
2335iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2336 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2337{
2338 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2339 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2340 pEntry->GuestRegShadowing.uUnused = 0;
2341 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2342 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2343 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2344#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2345 Assert( idxHstReg != UINT8_MAX
2346 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2347#endif
2348}
2349
2350
2351# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2352/**
2353 * Debug Info: Record info about guest register shadowing.
2354 */
2355DECL_HIDDEN_THROW(void)
2356iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2357 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2358{
2359 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2360 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2361 pEntry->GuestSimdRegShadowing.uUnused = 0;
2362 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2363 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2364 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2365}
2366# endif
2367
2368
2369# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2370/**
2371 * Debug Info: Record info about delayed RIP updates.
2372 */
2373DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2374{
2375 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2376 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2377 pEntry->DelayedPcUpdate.offPc = offPc;
2378 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2379}
2380# endif
2381
2382# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2383
2384/**
2385 * Debug Info: Record info about a dirty guest register.
2386 */
2387DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2388 uint8_t idxGstReg, uint8_t idxHstReg)
2389{
2390 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2391 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2392 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2393 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2394 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2395}
2396
2397
2398/**
2399 * Debug Info: Record info about a dirty guest register writeback operation.
2400 */
2401DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2402{
2403 unsigned const cBitsGstRegMask = 25;
2404 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2405
2406 /* The first block of 25 bits: */
2407 if (fGstReg & fGstRegMask)
2408 {
2409 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2410 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2411 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2412 pEntry->GuestRegWriteback.cShift = 0;
2413 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2414 fGstReg &= ~(uint64_t)fGstRegMask;
2415 if (!fGstReg)
2416 return;
2417 }
2418
2419 /* The second block of 25 bits: */
2420 fGstReg >>= cBitsGstRegMask;
2421 if (fGstReg & fGstRegMask)
2422 {
2423 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2424 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2425 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2426 pEntry->GuestRegWriteback.cShift = 0;
2427 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2428 fGstReg &= ~(uint64_t)fGstRegMask;
2429 if (!fGstReg)
2430 return;
2431 }
2432
2433 /* The last block with 14 bits: */
2434 fGstReg >>= cBitsGstRegMask;
2435 Assert(fGstReg & fGstRegMask);
2436 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2437 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2438 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2439 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2440 pEntry->GuestRegWriteback.cShift = 2;
2441 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2442}
2443
2444# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2445
2446#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2447
2448
2449/*********************************************************************************************************************************
2450* Register Allocator *
2451*********************************************************************************************************************************/
2452
2453/**
2454 * Register parameter indexes (indexed by argument number).
2455 */
2456DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2457{
2458 IEMNATIVE_CALL_ARG0_GREG,
2459 IEMNATIVE_CALL_ARG1_GREG,
2460 IEMNATIVE_CALL_ARG2_GREG,
2461 IEMNATIVE_CALL_ARG3_GREG,
2462#if defined(IEMNATIVE_CALL_ARG4_GREG)
2463 IEMNATIVE_CALL_ARG4_GREG,
2464# if defined(IEMNATIVE_CALL_ARG5_GREG)
2465 IEMNATIVE_CALL_ARG5_GREG,
2466# if defined(IEMNATIVE_CALL_ARG6_GREG)
2467 IEMNATIVE_CALL_ARG6_GREG,
2468# if defined(IEMNATIVE_CALL_ARG7_GREG)
2469 IEMNATIVE_CALL_ARG7_GREG,
2470# endif
2471# endif
2472# endif
2473#endif
2474};
2475AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2476
2477/**
2478 * Call register masks indexed by argument count.
2479 */
2480DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2481{
2482 0,
2483 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2484 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2486 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2487 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2488#if defined(IEMNATIVE_CALL_ARG4_GREG)
2489 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2490 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2491# if defined(IEMNATIVE_CALL_ARG5_GREG)
2492 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2493 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2494# if defined(IEMNATIVE_CALL_ARG6_GREG)
2495 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2496 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2497 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2498# if defined(IEMNATIVE_CALL_ARG7_GREG)
2499 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2500 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2501 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2502# endif
2503# endif
2504# endif
2505#endif
2506};
2507
2508#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2509/**
2510 * BP offset of the stack argument slots.
2511 *
2512 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2513 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2514 */
2515DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2516{
2517 IEMNATIVE_FP_OFF_STACK_ARG0,
2518# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2519 IEMNATIVE_FP_OFF_STACK_ARG1,
2520# endif
2521# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2522 IEMNATIVE_FP_OFF_STACK_ARG2,
2523# endif
2524# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2525 IEMNATIVE_FP_OFF_STACK_ARG3,
2526# endif
2527};
2528AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2529#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2530
2531/**
2532 * Info about shadowed guest register values.
2533 * @see IEMNATIVEGSTREG
2534 */
2535DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2536{
2537#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2538 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2539 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2540 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2541 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2542 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2543 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2544 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2545 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2546 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2547 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2548 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2549 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2550 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2551 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2552 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2553 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2554 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2555 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2556 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2557 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2558 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2559 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2560 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2561 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2562 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2563 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2564 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2565 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2566 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2567 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2568 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2569 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2570 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2571 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2572 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2573 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2574 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2575 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2576 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2577 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2578 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2579 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2580 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2581 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2582 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2583 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2584 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2585 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2586#undef CPUMCTX_OFF_AND_SIZE
2587};
2588AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2589
2590
2591/** Host CPU general purpose register names. */
2592DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2593{
2594#ifdef RT_ARCH_AMD64
2595 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2596#elif RT_ARCH_ARM64
2597 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2598 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2599#else
2600# error "port me"
2601#endif
2602};
2603
2604
2605#if 0 /* unused */
2606/**
2607 * Tries to locate a suitable register in the given register mask.
2608 *
2609 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2610 * failed.
2611 *
2612 * @returns Host register number on success, returns UINT8_MAX on failure.
2613 */
2614static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2615{
2616 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2617 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2618 if (fRegs)
2619 {
2620 /** @todo pick better here: */
2621 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2622
2623 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2624 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2625 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2626 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2627
2628 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2629 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2630 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2631 return idxReg;
2632 }
2633 return UINT8_MAX;
2634}
2635#endif /* unused */
2636
2637
2638#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2639/**
2640 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2641 *
2642 * @returns New code buffer offset on success, UINT32_MAX on failure.
2643 * @param pReNative .
2644 * @param off The current code buffer position.
2645 * @param enmGstReg The guest register to store to.
2646 * @param idxHstReg The host register to store from.
2647 */
2648DECL_FORCE_INLINE_THROW(uint32_t)
2649iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2650{
2651 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2652 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2653
2654 switch (g_aGstShadowInfo[enmGstReg].cb)
2655 {
2656 case sizeof(uint64_t):
2657 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2658 case sizeof(uint32_t):
2659 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2660 case sizeof(uint16_t):
2661 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2662#if 0 /* not present in the table. */
2663 case sizeof(uint8_t):
2664 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2665#endif
2666 default:
2667 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2668 }
2669}
2670
2671
2672/**
2673 * Emits code to flush a pending write of the given guest register if any.
2674 *
2675 * @returns New code buffer offset.
2676 * @param pReNative The native recompile state.
2677 * @param off Current code buffer position.
2678 * @param enmGstReg The guest register to flush.
2679 */
2680DECL_HIDDEN_THROW(uint32_t)
2681iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2682{
2683 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2684
2685 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2686 && enmGstReg <= kIemNativeGstReg_GprLast)
2687 || enmGstReg == kIemNativeGstReg_MxCsr);
2688 Assert( idxHstReg != UINT8_MAX
2689 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2690 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2691 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2692
2693 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2694
2695 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2696 return off;
2697}
2698
2699
2700/**
2701 * Flush the given set of guest registers if marked as dirty.
2702 *
2703 * @returns New code buffer offset.
2704 * @param pReNative The native recompile state.
2705 * @param off Current code buffer position.
2706 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2707 */
2708DECL_HIDDEN_THROW(uint32_t)
2709iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2710{
2711 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2712 if (bmGstRegShadowDirty)
2713 {
2714# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2715 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2716 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2717# endif
2718 do
2719 {
2720 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2721 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2722 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2723 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2724 } while (bmGstRegShadowDirty);
2725 }
2726
2727 return off;
2728}
2729
2730
2731/**
2732 * Flush all shadowed guest registers marked as dirty for the given host register.
2733 *
2734 * @returns New code buffer offset.
2735 * @param pReNative The native recompile state.
2736 * @param off Current code buffer position.
2737 * @param idxHstReg The host register.
2738 *
2739 * @note This doesn't do any unshadowing of guest registers from the host register.
2740 */
2741DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2742{
2743 /* We need to flush any pending guest register writes this host register shadows. */
2744 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2745 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2746 {
2747# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2748 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2749 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2750# endif
2751 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2752 * likely to only have a single bit set. It'll be in the 0..15 range,
2753 * but still it's 15 unnecessary loops for the last guest register. */
2754
2755 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2756 do
2757 {
2758 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2759 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2760 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2761 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2762 } while (bmGstRegShadowDirty);
2763 }
2764
2765 return off;
2766}
2767#endif
2768
2769
2770/**
2771 * Locate a register, possibly freeing one up.
2772 *
2773 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2774 * failed.
2775 *
2776 * @returns Host register number on success. Returns UINT8_MAX if no registers
2777 * found, the caller is supposed to deal with this and raise a
2778 * allocation type specific status code (if desired).
2779 *
2780 * @throws VBox status code if we're run into trouble spilling a variable of
2781 * recording debug info. Does NOT throw anything if we're out of
2782 * registers, though.
2783 */
2784static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2785 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2786{
2787 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2788 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2789 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2790
2791 /*
2792 * Try a freed register that's shadowing a guest register.
2793 */
2794 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2795 if (fRegs)
2796 {
2797 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2798
2799#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2800 /*
2801 * When we have livness information, we use it to kick out all shadowed
2802 * guest register that will not be needed any more in this TB. If we're
2803 * lucky, this may prevent us from ending up here again.
2804 *
2805 * Note! We must consider the previous entry here so we don't free
2806 * anything that the current threaded function requires (current
2807 * entry is produced by the next threaded function).
2808 */
2809 uint32_t const idxCurCall = pReNative->idxCurCall;
2810 if (idxCurCall > 0)
2811 {
2812 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2813
2814# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2815 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2816 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2817 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2818#else
2819 /* Construct a mask of the registers not in the read or write state.
2820 Note! We could skips writes, if they aren't from us, as this is just
2821 a hack to prevent trashing registers that have just been written
2822 or will be written when we retire the current instruction. */
2823 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2824 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2825 & IEMLIVENESSBIT_MASK;
2826#endif
2827 /* Merge EFLAGS. */
2828 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2829 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2830 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2831 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2832 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2833
2834 /* If it matches any shadowed registers. */
2835 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2836 {
2837#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2838 /* Writeback any dirty shadow registers we are about to unshadow. */
2839 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2840#endif
2841
2842 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2843 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2844 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2845
2846 /* See if we've got any unshadowed registers we can return now. */
2847 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2848 if (fUnshadowedRegs)
2849 {
2850 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2851 return (fPreferVolatile
2852 ? ASMBitFirstSetU32(fUnshadowedRegs)
2853 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2854 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2855 - 1;
2856 }
2857 }
2858 }
2859#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2860
2861 unsigned const idxReg = (fPreferVolatile
2862 ? ASMBitFirstSetU32(fRegs)
2863 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2864 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2865 - 1;
2866
2867 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2868 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2869 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2870 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2871
2872#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2873 /* We need to flush any pending guest register writes this host register shadows. */
2874 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2875#endif
2876
2877 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2878 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2879 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2880 return idxReg;
2881 }
2882
2883 /*
2884 * Try free up a variable that's in a register.
2885 *
2886 * We do two rounds here, first evacuating variables we don't need to be
2887 * saved on the stack, then in the second round move things to the stack.
2888 */
2889 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2890 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2891 {
2892 uint32_t fVars = pReNative->Core.bmVars;
2893 while (fVars)
2894 {
2895 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2896 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2897#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2898 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2899 continue;
2900#endif
2901
2902 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2903 && (RT_BIT_32(idxReg) & fRegMask)
2904 && ( iLoop == 0
2905 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2906 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2907 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2908 {
2909 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2910 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2911 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2912 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2913 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2914 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2915#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2916 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2917#endif
2918
2919 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2920 {
2921 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2922 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2923 }
2924
2925 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2926 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2927
2928 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2929 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2930 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2931 return idxReg;
2932 }
2933 fVars &= ~RT_BIT_32(idxVar);
2934 }
2935 }
2936
2937 return UINT8_MAX;
2938}
2939
2940
2941/**
2942 * Reassigns a variable to a different register specified by the caller.
2943 *
2944 * @returns The new code buffer position.
2945 * @param pReNative The native recompile state.
2946 * @param off The current code buffer position.
2947 * @param idxVar The variable index.
2948 * @param idxRegOld The old host register number.
2949 * @param idxRegNew The new host register number.
2950 * @param pszCaller The caller for logging.
2951 */
2952static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2953 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2954{
2955 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2956 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2957#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2958 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2959#endif
2960 RT_NOREF(pszCaller);
2961
2962#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2963 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2964#endif
2965 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2966
2967 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2968#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2969 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2970#endif
2971 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2972 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2973 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2974
2975 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2976 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2977 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2978 if (fGstRegShadows)
2979 {
2980 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2981 | RT_BIT_32(idxRegNew);
2982 while (fGstRegShadows)
2983 {
2984 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2985 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2986
2987 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2988 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2989 }
2990 }
2991
2992 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2993 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2994 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2995 return off;
2996}
2997
2998
2999/**
3000 * Moves a variable to a different register or spills it onto the stack.
3001 *
3002 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3003 * kinds can easily be recreated if needed later.
3004 *
3005 * @returns The new code buffer position.
3006 * @param pReNative The native recompile state.
3007 * @param off The current code buffer position.
3008 * @param idxVar The variable index.
3009 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3010 * call-volatile registers.
3011 */
3012DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3013 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3014{
3015 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3016 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3017 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3018 Assert(!pVar->fRegAcquired);
3019
3020 uint8_t const idxRegOld = pVar->idxReg;
3021 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3022 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3023 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3024 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3025 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3026 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3027 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3028 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3029#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3030 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3031#endif
3032
3033
3034 /** @todo Add statistics on this.*/
3035 /** @todo Implement basic variable liveness analysis (python) so variables
3036 * can be freed immediately once no longer used. This has the potential to
3037 * be trashing registers and stack for dead variables.
3038 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3039
3040 /*
3041 * First try move it to a different register, as that's cheaper.
3042 */
3043 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3044 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3045 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3046 if (fRegs)
3047 {
3048 /* Avoid using shadow registers, if possible. */
3049 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3050 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3051 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3052 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3053 }
3054
3055 /*
3056 * Otherwise we must spill the register onto the stack.
3057 */
3058 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3059 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3060 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3061 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3062
3063 pVar->idxReg = UINT8_MAX;
3064 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3065 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3066 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3067 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3068 return off;
3069}
3070
3071
3072/**
3073 * Allocates a temporary host general purpose register.
3074 *
3075 * This may emit code to save register content onto the stack in order to free
3076 * up a register.
3077 *
3078 * @returns The host register number; throws VBox status code on failure,
3079 * so no need to check the return value.
3080 * @param pReNative The native recompile state.
3081 * @param poff Pointer to the variable with the code buffer position.
3082 * This will be update if we need to move a variable from
3083 * register to stack in order to satisfy the request.
3084 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3085 * registers (@c true, default) or the other way around
3086 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3087 */
3088DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3089{
3090 /*
3091 * Try find a completely unused register, preferably a call-volatile one.
3092 */
3093 uint8_t idxReg;
3094 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3095 & ~pReNative->Core.bmHstRegsWithGstShadow
3096 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3097 if (fRegs)
3098 {
3099 if (fPreferVolatile)
3100 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3101 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3102 else
3103 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3104 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3105 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3106 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3107 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3108 }
3109 else
3110 {
3111 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3112 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3113 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3114 }
3115 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3116}
3117
3118
3119/**
3120 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3121 * registers.
3122 *
3123 * @returns The host register number; throws VBox status code on failure,
3124 * so no need to check the return value.
3125 * @param pReNative The native recompile state.
3126 * @param poff Pointer to the variable with the code buffer position.
3127 * This will be update if we need to move a variable from
3128 * register to stack in order to satisfy the request.
3129 * @param fRegMask Mask of acceptable registers.
3130 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3131 * registers (@c true, default) or the other way around
3132 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3133 */
3134DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3135 bool fPreferVolatile /*= true*/)
3136{
3137 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3138 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3139
3140 /*
3141 * Try find a completely unused register, preferably a call-volatile one.
3142 */
3143 uint8_t idxReg;
3144 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3145 & ~pReNative->Core.bmHstRegsWithGstShadow
3146 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3147 & fRegMask;
3148 if (fRegs)
3149 {
3150 if (fPreferVolatile)
3151 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3152 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3153 else
3154 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3155 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3156 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3157 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3158 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3159 }
3160 else
3161 {
3162 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3163 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3164 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3165 }
3166 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3167}
3168
3169
3170/**
3171 * Allocates a temporary register for loading an immediate value into.
3172 *
3173 * This will emit code to load the immediate, unless there happens to be an
3174 * unused register with the value already loaded.
3175 *
3176 * The caller will not modify the returned register, it must be considered
3177 * read-only. Free using iemNativeRegFreeTmpImm.
3178 *
3179 * @returns The host register number; throws VBox status code on failure, so no
3180 * need to check the return value.
3181 * @param pReNative The native recompile state.
3182 * @param poff Pointer to the variable with the code buffer position.
3183 * @param uImm The immediate value that the register must hold upon
3184 * return.
3185 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3186 * registers (@c true, default) or the other way around
3187 * (@c false).
3188 *
3189 * @note Reusing immediate values has not been implemented yet.
3190 */
3191DECL_HIDDEN_THROW(uint8_t)
3192iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3193{
3194 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3195 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3196 return idxReg;
3197}
3198
3199
3200/**
3201 * Allocates a temporary host general purpose register for keeping a guest
3202 * register value.
3203 *
3204 * Since we may already have a register holding the guest register value,
3205 * code will be emitted to do the loading if that's not the case. Code may also
3206 * be emitted if we have to free up a register to satify the request.
3207 *
3208 * @returns The host register number; throws VBox status code on failure, so no
3209 * need to check the return value.
3210 * @param pReNative The native recompile state.
3211 * @param poff Pointer to the variable with the code buffer
3212 * position. This will be update if we need to move a
3213 * variable from register to stack in order to satisfy
3214 * the request.
3215 * @param enmGstReg The guest register that will is to be updated.
3216 * @param enmIntendedUse How the caller will be using the host register.
3217 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3218 * register is okay (default). The ASSUMPTION here is
3219 * that the caller has already flushed all volatile
3220 * registers, so this is only applied if we allocate a
3221 * new register.
3222 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3223 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3224 */
3225DECL_HIDDEN_THROW(uint8_t)
3226iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3227 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3228 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3229{
3230 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3231#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3232 AssertMsg( fSkipLivenessAssert
3233 || pReNative->idxCurCall == 0
3234 || enmGstReg == kIemNativeGstReg_Pc
3235 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3236 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3237 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3238 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3239 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3240 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3241#endif
3242 RT_NOREF(fSkipLivenessAssert);
3243#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3244 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3245#endif
3246 uint32_t const fRegMask = !fNoVolatileRegs
3247 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3248 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3249
3250 /*
3251 * First check if the guest register value is already in a host register.
3252 */
3253 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3254 {
3255 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3256 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3257 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3258 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3259
3260 /* It's not supposed to be allocated... */
3261 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3262 {
3263 /*
3264 * If the register will trash the guest shadow copy, try find a
3265 * completely unused register we can use instead. If that fails,
3266 * we need to disassociate the host reg from the guest reg.
3267 */
3268 /** @todo would be nice to know if preserving the register is in any way helpful. */
3269 /* If the purpose is calculations, try duplicate the register value as
3270 we'll be clobbering the shadow. */
3271 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3272 && ( ~pReNative->Core.bmHstRegs
3273 & ~pReNative->Core.bmHstRegsWithGstShadow
3274 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3275 {
3276 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3277
3278 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3279
3280 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3281 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3282 g_apszIemNativeHstRegNames[idxRegNew]));
3283 idxReg = idxRegNew;
3284 }
3285 /* If the current register matches the restrictions, go ahead and allocate
3286 it for the caller. */
3287 else if (fRegMask & RT_BIT_32(idxReg))
3288 {
3289 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3290 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3291 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3292 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3293 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3294 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3295 else
3296 {
3297 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3298 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3299 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3300 }
3301 }
3302 /* Otherwise, allocate a register that satisfies the caller and transfer
3303 the shadowing if compatible with the intended use. (This basically
3304 means the call wants a non-volatile register (RSP push/pop scenario).) */
3305 else
3306 {
3307 Assert(fNoVolatileRegs);
3308 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3309 !fNoVolatileRegs
3310 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3311 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3312 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3313 {
3314 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3315 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3316 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3317 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3318 }
3319 else
3320 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3321 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3322 g_apszIemNativeHstRegNames[idxRegNew]));
3323 idxReg = idxRegNew;
3324 }
3325 }
3326 else
3327 {
3328 /*
3329 * Oops. Shadowed guest register already allocated!
3330 *
3331 * Allocate a new register, copy the value and, if updating, the
3332 * guest shadow copy assignment to the new register.
3333 */
3334 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3335 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3336 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3337 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3338
3339 /** @todo share register for readonly access. */
3340 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3341 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3342
3343 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3344 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3345
3346 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3347 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3348 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3349 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3350 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3351 else
3352 {
3353 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3354 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3355 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3356 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3357 }
3358 idxReg = idxRegNew;
3359 }
3360 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3361
3362#ifdef VBOX_STRICT
3363 /* Strict builds: Check that the value is correct. */
3364 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3365#endif
3366
3367#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3368 /** @todo r=aeichner Implement for registers other than GPR as well. */
3369 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3370 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3371 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3372 && enmGstReg <= kIemNativeGstReg_GprLast)
3373 || enmGstReg == kIemNativeGstReg_MxCsr))
3374 {
3375# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3376 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3377 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3378# endif
3379 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3380 }
3381#endif
3382
3383 return idxReg;
3384 }
3385
3386 /*
3387 * Allocate a new register, load it with the guest value and designate it as a copy of the
3388 */
3389 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3390
3391 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3392 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3393
3394 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3395 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3396 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3397 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3398
3399#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3400 /** @todo r=aeichner Implement for registers other than GPR as well. */
3401 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3402 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3403 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3404 && enmGstReg <= kIemNativeGstReg_GprLast)
3405 || enmGstReg == kIemNativeGstReg_MxCsr))
3406 {
3407# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3408 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3409 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3410# endif
3411 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3412 }
3413#endif
3414
3415 return idxRegNew;
3416}
3417
3418
3419/**
3420 * Allocates a temporary host general purpose register that already holds the
3421 * given guest register value.
3422 *
3423 * The use case for this function is places where the shadowing state cannot be
3424 * modified due to branching and such. This will fail if the we don't have a
3425 * current shadow copy handy or if it's incompatible. The only code that will
3426 * be emitted here is value checking code in strict builds.
3427 *
3428 * The intended use can only be readonly!
3429 *
3430 * @returns The host register number, UINT8_MAX if not present.
3431 * @param pReNative The native recompile state.
3432 * @param poff Pointer to the instruction buffer offset.
3433 * Will be updated in strict builds if a register is
3434 * found.
3435 * @param enmGstReg The guest register that will is to be updated.
3436 * @note In strict builds, this may throw instruction buffer growth failures.
3437 * Non-strict builds will not throw anything.
3438 * @sa iemNativeRegAllocTmpForGuestReg
3439 */
3440DECL_HIDDEN_THROW(uint8_t)
3441iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3442{
3443 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3444#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3445 AssertMsg( pReNative->idxCurCall == 0
3446 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3447 || enmGstReg == kIemNativeGstReg_Pc,
3448 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3449#endif
3450
3451 /*
3452 * First check if the guest register value is already in a host register.
3453 */
3454 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3455 {
3456 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3457 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3458 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3459 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3460
3461 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3462 {
3463 /*
3464 * We only do readonly use here, so easy compared to the other
3465 * variant of this code.
3466 */
3467 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3468 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3469 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3470 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3471 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3472
3473#ifdef VBOX_STRICT
3474 /* Strict builds: Check that the value is correct. */
3475 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3476#else
3477 RT_NOREF(poff);
3478#endif
3479 return idxReg;
3480 }
3481 }
3482
3483 return UINT8_MAX;
3484}
3485
3486
3487/**
3488 * Allocates argument registers for a function call.
3489 *
3490 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3491 * need to check the return value.
3492 * @param pReNative The native recompile state.
3493 * @param off The current code buffer offset.
3494 * @param cArgs The number of arguments the function call takes.
3495 */
3496DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3497{
3498 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3499 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3500 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3501 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3502
3503 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3504 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3505 else if (cArgs == 0)
3506 return true;
3507
3508 /*
3509 * Do we get luck and all register are free and not shadowing anything?
3510 */
3511 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3512 for (uint32_t i = 0; i < cArgs; i++)
3513 {
3514 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3515 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3516 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3517 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3518 }
3519 /*
3520 * Okay, not lucky so we have to free up the registers.
3521 */
3522 else
3523 for (uint32_t i = 0; i < cArgs; i++)
3524 {
3525 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3526 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3527 {
3528 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3529 {
3530 case kIemNativeWhat_Var:
3531 {
3532 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3534 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3535 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3536 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3537#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3538 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3539#endif
3540
3541 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3542 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3543 else
3544 {
3545 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3546 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3547 }
3548 break;
3549 }
3550
3551 case kIemNativeWhat_Tmp:
3552 case kIemNativeWhat_Arg:
3553 case kIemNativeWhat_rc:
3554 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3555 default:
3556 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3557 }
3558
3559 }
3560 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3561 {
3562 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3563 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3564 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3565#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3566 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3567#endif
3568 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3569 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3570 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3571 }
3572 else
3573 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3574 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3575 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3576 }
3577 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3578 return true;
3579}
3580
3581
3582DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3583
3584
3585#if 0
3586/**
3587 * Frees a register assignment of any type.
3588 *
3589 * @param pReNative The native recompile state.
3590 * @param idxHstReg The register to free.
3591 *
3592 * @note Does not update variables.
3593 */
3594DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3595{
3596 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3597 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3598 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3599 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3600 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3601 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3602 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3603 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3604 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3605 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3606 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3607 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3608 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3609 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3610
3611 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3612 /* no flushing, right:
3613 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3614 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3615 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3616 */
3617}
3618#endif
3619
3620
3621/**
3622 * Frees a temporary register.
3623 *
3624 * Any shadow copies of guest registers assigned to the host register will not
3625 * be flushed by this operation.
3626 */
3627DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3628{
3629 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3630 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3631 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3632 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3633 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3634}
3635
3636
3637/**
3638 * Frees a temporary immediate register.
3639 *
3640 * It is assumed that the call has not modified the register, so it still hold
3641 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3642 */
3643DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3644{
3645 iemNativeRegFreeTmp(pReNative, idxHstReg);
3646}
3647
3648
3649/**
3650 * Frees a register assigned to a variable.
3651 *
3652 * The register will be disassociated from the variable.
3653 */
3654DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3655{
3656 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3657 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3658 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3659 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3660 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3661#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3662 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3663#endif
3664
3665 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3666 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3667 if (!fFlushShadows)
3668 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3669 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3670 else
3671 {
3672 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3673 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3674#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3675 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3676#endif
3677 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3678 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3679 uint64_t fGstRegShadows = fGstRegShadowsOld;
3680 while (fGstRegShadows)
3681 {
3682 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3683 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3684
3685 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3686 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3687 }
3688 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3689 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3690 }
3691}
3692
3693
3694#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3695# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
3696/** Host CPU SIMD register names. */
3697DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3698{
3699# ifdef RT_ARCH_AMD64
3700 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3701# elif RT_ARCH_ARM64
3702 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3703 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3704# else
3705# error "port me"
3706# endif
3707};
3708# endif
3709
3710
3711/**
3712 * Frees a SIMD register assigned to a variable.
3713 *
3714 * The register will be disassociated from the variable.
3715 */
3716DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3717{
3718 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3719 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3720 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3722 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3723 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3724
3725 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3726 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3727 if (!fFlushShadows)
3728 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3729 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3730 else
3731 {
3732 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3733 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3734 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3735 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3736 uint64_t fGstRegShadows = fGstRegShadowsOld;
3737 while (fGstRegShadows)
3738 {
3739 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3740 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3741
3742 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3743 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3744 }
3745 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3746 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3747 }
3748}
3749
3750
3751/**
3752 * Reassigns a variable to a different SIMD register specified by the caller.
3753 *
3754 * @returns The new code buffer position.
3755 * @param pReNative The native recompile state.
3756 * @param off The current code buffer position.
3757 * @param idxVar The variable index.
3758 * @param idxRegOld The old host register number.
3759 * @param idxRegNew The new host register number.
3760 * @param pszCaller The caller for logging.
3761 */
3762static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3763 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3764{
3765 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3766 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3767 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3768 RT_NOREF(pszCaller);
3769
3770 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3771 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3772 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3773
3774 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3775 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3776 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3777
3778 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3779 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3780 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3781
3782 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3783 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3784 else
3785 {
3786 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3787 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3788 }
3789
3790 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3791 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3792 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3793 if (fGstRegShadows)
3794 {
3795 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3796 | RT_BIT_32(idxRegNew);
3797 while (fGstRegShadows)
3798 {
3799 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3800 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3801
3802 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3803 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3804 }
3805 }
3806
3807 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3808 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3809 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3810 return off;
3811}
3812
3813
3814/**
3815 * Moves a variable to a different register or spills it onto the stack.
3816 *
3817 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3818 * kinds can easily be recreated if needed later.
3819 *
3820 * @returns The new code buffer position.
3821 * @param pReNative The native recompile state.
3822 * @param off The current code buffer position.
3823 * @param idxVar The variable index.
3824 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3825 * call-volatile registers.
3826 */
3827DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3828 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3829{
3830 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3831 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3832 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3833 Assert(!pVar->fRegAcquired);
3834 Assert(!pVar->fSimdReg);
3835
3836 uint8_t const idxRegOld = pVar->idxReg;
3837 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3838 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3839 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3840 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3841 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3842 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3843 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3844 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3845 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3846 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3847
3848 /** @todo Add statistics on this.*/
3849 /** @todo Implement basic variable liveness analysis (python) so variables
3850 * can be freed immediately once no longer used. This has the potential to
3851 * be trashing registers and stack for dead variables.
3852 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3853
3854 /*
3855 * First try move it to a different register, as that's cheaper.
3856 */
3857 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3858 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3859 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3860 if (fRegs)
3861 {
3862 /* Avoid using shadow registers, if possible. */
3863 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3864 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3865 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3866 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3867 }
3868
3869 /*
3870 * Otherwise we must spill the register onto the stack.
3871 */
3872 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3873 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3874 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3875
3876 if (pVar->cbVar == sizeof(RTUINT128U))
3877 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3878 else
3879 {
3880 Assert(pVar->cbVar == sizeof(RTUINT256U));
3881 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3882 }
3883
3884 pVar->idxReg = UINT8_MAX;
3885 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3886 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3887 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3888 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3889 return off;
3890}
3891
3892
3893/**
3894 * Called right before emitting a call instruction to move anything important
3895 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3896 * optionally freeing argument variables.
3897 *
3898 * @returns New code buffer offset, UINT32_MAX on failure.
3899 * @param pReNative The native recompile state.
3900 * @param off The code buffer offset.
3901 * @param cArgs The number of arguments the function call takes.
3902 * It is presumed that the host register part of these have
3903 * been allocated as such already and won't need moving,
3904 * just freeing.
3905 * @param fKeepVars Mask of variables that should keep their register
3906 * assignments. Caller must take care to handle these.
3907 */
3908DECL_HIDDEN_THROW(uint32_t)
3909iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3910{
3911 Assert(!cArgs); RT_NOREF(cArgs);
3912
3913 /* fKeepVars will reduce this mask. */
3914 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3915
3916 /*
3917 * Move anything important out of volatile registers.
3918 */
3919 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3920#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3921 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3922#endif
3923 ;
3924
3925 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3926 if (!fSimdRegsToMove)
3927 { /* likely */ }
3928 else
3929 {
3930 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3931 while (fSimdRegsToMove != 0)
3932 {
3933 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3934 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3935
3936 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3937 {
3938 case kIemNativeWhat_Var:
3939 {
3940 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3941 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3942 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3943 Assert(pVar->idxReg == idxSimdReg);
3944 Assert(pVar->fSimdReg);
3945 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3946 {
3947 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3948 idxVar, pVar->enmKind, pVar->idxReg));
3949 if (pVar->enmKind != kIemNativeVarKind_Stack)
3950 pVar->idxReg = UINT8_MAX;
3951 else
3952 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3953 }
3954 else
3955 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3956 continue;
3957 }
3958
3959 case kIemNativeWhat_Arg:
3960 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3961 continue;
3962
3963 case kIemNativeWhat_rc:
3964 case kIemNativeWhat_Tmp:
3965 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3966 continue;
3967
3968 case kIemNativeWhat_FixedReserved:
3969#ifdef RT_ARCH_ARM64
3970 continue; /* On ARM the upper half of the virtual 256-bit register. */
3971#endif
3972
3973 case kIemNativeWhat_FixedTmp:
3974 case kIemNativeWhat_pVCpuFixed:
3975 case kIemNativeWhat_pCtxFixed:
3976 case kIemNativeWhat_PcShadow:
3977 case kIemNativeWhat_Invalid:
3978 case kIemNativeWhat_End:
3979 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3980 }
3981 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3982 }
3983 }
3984
3985 /*
3986 * Do the actual freeing.
3987 */
3988 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3989 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3990 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3991 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3992
3993 /* If there are guest register shadows in any call-volatile register, we
3994 have to clear the corrsponding guest register masks for each register. */
3995 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3996 if (fHstSimdRegsWithGstShadow)
3997 {
3998 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3999 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4000 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4001 do
4002 {
4003 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4004 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4005
4006 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4007
4008#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4009 /*
4010 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4011 * to call volatile registers).
4012 */
4013 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4014 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4015 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4016#endif
4017 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4018 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4019
4020 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4021 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4022 } while (fHstSimdRegsWithGstShadow != 0);
4023 }
4024
4025 return off;
4026}
4027#endif
4028
4029
4030/**
4031 * Called right before emitting a call instruction to move anything important
4032 * out of call-volatile registers, free and flush the call-volatile registers,
4033 * optionally freeing argument variables.
4034 *
4035 * @returns New code buffer offset, UINT32_MAX on failure.
4036 * @param pReNative The native recompile state.
4037 * @param off The code buffer offset.
4038 * @param cArgs The number of arguments the function call takes.
4039 * It is presumed that the host register part of these have
4040 * been allocated as such already and won't need moving,
4041 * just freeing.
4042 * @param fKeepVars Mask of variables that should keep their register
4043 * assignments. Caller must take care to handle these.
4044 */
4045DECL_HIDDEN_THROW(uint32_t)
4046iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4047{
4048 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4049
4050 /* fKeepVars will reduce this mask. */
4051 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4052
4053 /*
4054 * Move anything important out of volatile registers.
4055 */
4056 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4057 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4058 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4059#ifdef IEMNATIVE_REG_FIXED_TMP0
4060 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4061#endif
4062#ifdef IEMNATIVE_REG_FIXED_TMP1
4063 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4064#endif
4065#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4066 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4067#endif
4068 & ~g_afIemNativeCallRegs[cArgs];
4069
4070 fRegsToMove &= pReNative->Core.bmHstRegs;
4071 if (!fRegsToMove)
4072 { /* likely */ }
4073 else
4074 {
4075 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4076 while (fRegsToMove != 0)
4077 {
4078 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4079 fRegsToMove &= ~RT_BIT_32(idxReg);
4080
4081 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4082 {
4083 case kIemNativeWhat_Var:
4084 {
4085 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4086 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4087 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4088 Assert(pVar->idxReg == idxReg);
4089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4090 Assert(!pVar->fSimdReg);
4091#endif
4092 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4093 {
4094 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4095 idxVar, pVar->enmKind, pVar->idxReg));
4096 if (pVar->enmKind != kIemNativeVarKind_Stack)
4097 pVar->idxReg = UINT8_MAX;
4098 else
4099 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4100 }
4101 else
4102 fRegsToFree &= ~RT_BIT_32(idxReg);
4103 continue;
4104 }
4105
4106 case kIemNativeWhat_Arg:
4107 AssertMsgFailed(("What?!?: %u\n", idxReg));
4108 continue;
4109
4110 case kIemNativeWhat_rc:
4111 case kIemNativeWhat_Tmp:
4112 AssertMsgFailed(("Missing free: %u\n", idxReg));
4113 continue;
4114
4115 case kIemNativeWhat_FixedTmp:
4116 case kIemNativeWhat_pVCpuFixed:
4117 case kIemNativeWhat_pCtxFixed:
4118 case kIemNativeWhat_PcShadow:
4119 case kIemNativeWhat_FixedReserved:
4120 case kIemNativeWhat_Invalid:
4121 case kIemNativeWhat_End:
4122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4123 }
4124 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4125 }
4126 }
4127
4128 /*
4129 * Do the actual freeing.
4130 */
4131 if (pReNative->Core.bmHstRegs & fRegsToFree)
4132 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4133 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4134 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4135
4136 /* If there are guest register shadows in any call-volatile register, we
4137 have to clear the corrsponding guest register masks for each register. */
4138 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4139 if (fHstRegsWithGstShadow)
4140 {
4141 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4142 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4143 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4144 do
4145 {
4146 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4147 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4148
4149 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4150
4151#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4152 /*
4153 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4154 * to call volatile registers).
4155 */
4156 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4157 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4158 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4159#endif
4160
4161 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4162 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4163 } while (fHstRegsWithGstShadow != 0);
4164 }
4165
4166#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4167 /* Now for the SIMD registers, no argument support for now. */
4168 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4169#endif
4170
4171 return off;
4172}
4173
4174
4175/**
4176 * Flushes a set of guest register shadow copies.
4177 *
4178 * This is usually done after calling a threaded function or a C-implementation
4179 * of an instruction.
4180 *
4181 * @param pReNative The native recompile state.
4182 * @param fGstRegs Set of guest registers to flush.
4183 */
4184DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4185{
4186 /*
4187 * Reduce the mask by what's currently shadowed
4188 */
4189 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4190 fGstRegs &= bmGstRegShadowsOld;
4191 if (fGstRegs)
4192 {
4193 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4194 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4195 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4196 if (bmGstRegShadowsNew)
4197 {
4198 /*
4199 * Partial.
4200 */
4201 do
4202 {
4203 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4204 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4205 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4206 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4207 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4208#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4209 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4210#endif
4211
4212 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4213 fGstRegs &= ~fInThisHstReg;
4214 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4215 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4216 if (!fGstRegShadowsNew)
4217 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4218 } while (fGstRegs != 0);
4219 }
4220 else
4221 {
4222 /*
4223 * Clear all.
4224 */
4225 do
4226 {
4227 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4228 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4229 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4230 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4231 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4232#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4233 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4234#endif
4235
4236 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4237 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4238 } while (fGstRegs != 0);
4239 pReNative->Core.bmHstRegsWithGstShadow = 0;
4240 }
4241 }
4242}
4243
4244
4245/**
4246 * Flushes guest register shadow copies held by a set of host registers.
4247 *
4248 * This is used with the TLB lookup code for ensuring that we don't carry on
4249 * with any guest shadows in volatile registers, as these will get corrupted by
4250 * a TLB miss.
4251 *
4252 * @param pReNative The native recompile state.
4253 * @param fHstRegs Set of host registers to flush guest shadows for.
4254 */
4255DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4256{
4257 /*
4258 * Reduce the mask by what's currently shadowed.
4259 */
4260 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4261 fHstRegs &= bmHstRegsWithGstShadowOld;
4262 if (fHstRegs)
4263 {
4264 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4265 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4266 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4267 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4268 if (bmHstRegsWithGstShadowNew)
4269 {
4270 /*
4271 * Partial (likely).
4272 */
4273 uint64_t fGstShadows = 0;
4274 do
4275 {
4276 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4277 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4278 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4279 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4280#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4281 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4282#endif
4283
4284 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4285 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4286 fHstRegs &= ~RT_BIT_32(idxHstReg);
4287 } while (fHstRegs != 0);
4288 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4289 }
4290 else
4291 {
4292 /*
4293 * Clear all.
4294 */
4295 do
4296 {
4297 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4298 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4299 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4300 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4301#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4302 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4303#endif
4304
4305 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4306 fHstRegs &= ~RT_BIT_32(idxHstReg);
4307 } while (fHstRegs != 0);
4308 pReNative->Core.bmGstRegShadows = 0;
4309 }
4310 }
4311}
4312
4313
4314/**
4315 * Restores guest shadow copies in volatile registers.
4316 *
4317 * This is used after calling a helper function (think TLB miss) to restore the
4318 * register state of volatile registers.
4319 *
4320 * @param pReNative The native recompile state.
4321 * @param off The code buffer offset.
4322 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4323 * be active (allocated) w/o asserting. Hack.
4324 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4325 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4326 */
4327DECL_HIDDEN_THROW(uint32_t)
4328iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4329{
4330 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4331 if (fHstRegs)
4332 {
4333 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4334 do
4335 {
4336 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4337
4338 /* It's not fatal if a register is active holding a variable that
4339 shadowing a guest register, ASSUMING all pending guest register
4340 writes were flushed prior to the helper call. However, we'll be
4341 emitting duplicate restores, so it wasts code space. */
4342 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4343 RT_NOREF(fHstRegsActiveShadows);
4344
4345 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4346#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4347 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4348#endif
4349 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4350 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4351 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4352
4353 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4354 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4355
4356 fHstRegs &= ~RT_BIT_32(idxHstReg);
4357 } while (fHstRegs != 0);
4358 }
4359 return off;
4360}
4361
4362
4363
4364
4365/*********************************************************************************************************************************
4366* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4367*********************************************************************************************************************************/
4368#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4369
4370/**
4371 * Info about shadowed guest SIMD register values.
4372 * @see IEMNATIVEGSTSIMDREG
4373 */
4374static struct
4375{
4376 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4377 uint32_t offXmm;
4378 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4379 uint32_t offYmm;
4380 /** Name (for logging). */
4381 const char *pszName;
4382} const g_aGstSimdShadowInfo[] =
4383{
4384#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4385 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4386 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4387 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4388 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4389 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4390 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4391 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4392 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4393 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4394 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4395 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4396 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4397 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4398 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4399 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4400 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4401 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4402#undef CPUMCTX_OFF_AND_SIZE
4403};
4404AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4405
4406
4407/**
4408 * Frees a temporary SIMD register.
4409 *
4410 * Any shadow copies of guest registers assigned to the host register will not
4411 * be flushed by this operation.
4412 */
4413DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4414{
4415 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4416 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4417 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4418 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4419 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4420}
4421
4422
4423/**
4424 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4425 *
4426 * @returns New code bufferoffset.
4427 * @param pReNative The native recompile state.
4428 * @param off Current code buffer position.
4429 * @param enmGstSimdReg The guest SIMD register to flush.
4430 */
4431DECL_HIDDEN_THROW(uint32_t)
4432iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4433{
4434 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4435
4436 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4437 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4438 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4439 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4440
4441 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4442 {
4443 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4444 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4445 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4446 }
4447
4448 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4449 {
4450 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4451 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4452 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4453 }
4454
4455 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4456 return off;
4457}
4458
4459
4460/**
4461 * Flush the given set of guest SIMD registers if marked as dirty.
4462 *
4463 * @returns New code buffer offset.
4464 * @param pReNative The native recompile state.
4465 * @param off Current code buffer position.
4466 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4467 */
4468DECL_HIDDEN_THROW(uint32_t)
4469iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4470{
4471 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4472 & fFlushGstSimdReg;
4473 if (bmGstSimdRegShadowDirty)
4474 {
4475# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4476 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4477 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4478# endif
4479
4480 do
4481 {
4482 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4483 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4484 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4485 } while (bmGstSimdRegShadowDirty);
4486 }
4487
4488 return off;
4489}
4490
4491
4492#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4493/**
4494 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4495 *
4496 * @returns New code buffer offset.
4497 * @param pReNative The native recompile state.
4498 * @param off Current code buffer position.
4499 * @param idxHstSimdReg The host SIMD register.
4500 *
4501 * @note This doesn't do any unshadowing of guest registers from the host register.
4502 */
4503DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4504{
4505 /* We need to flush any pending guest register writes this host register shadows. */
4506 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4507 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4508 if (bmGstSimdRegShadowDirty)
4509 {
4510# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4511 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4512 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4513# endif
4514
4515 do
4516 {
4517 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4518 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4519 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4520 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4521 } while (bmGstSimdRegShadowDirty);
4522 }
4523
4524 return off;
4525}
4526#endif
4527
4528
4529/**
4530 * Locate a register, possibly freeing one up.
4531 *
4532 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4533 * failed.
4534 *
4535 * @returns Host register number on success. Returns UINT8_MAX if no registers
4536 * found, the caller is supposed to deal with this and raise a
4537 * allocation type specific status code (if desired).
4538 *
4539 * @throws VBox status code if we're run into trouble spilling a variable of
4540 * recording debug info. Does NOT throw anything if we're out of
4541 * registers, though.
4542 */
4543static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4544 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4545{
4546 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4547 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4548 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4549
4550 /*
4551 * Try a freed register that's shadowing a guest register.
4552 */
4553 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4554 if (fRegs)
4555 {
4556 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4557
4558#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4559 /*
4560 * When we have livness information, we use it to kick out all shadowed
4561 * guest register that will not be needed any more in this TB. If we're
4562 * lucky, this may prevent us from ending up here again.
4563 *
4564 * Note! We must consider the previous entry here so we don't free
4565 * anything that the current threaded function requires (current
4566 * entry is produced by the next threaded function).
4567 */
4568 uint32_t const idxCurCall = pReNative->idxCurCall;
4569 if (idxCurCall > 0)
4570 {
4571 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4572
4573# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4574 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4575 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4576 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4577#else
4578 /* Construct a mask of the registers not in the read or write state.
4579 Note! We could skips writes, if they aren't from us, as this is just
4580 a hack to prevent trashing registers that have just been written
4581 or will be written when we retire the current instruction. */
4582 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4583 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4584 & IEMLIVENESSBIT_MASK;
4585#endif
4586 /* If it matches any shadowed registers. */
4587 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4588 {
4589 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4590 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4591 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4592
4593 /* See if we've got any unshadowed registers we can return now. */
4594 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4595 if (fUnshadowedRegs)
4596 {
4597 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4598 return (fPreferVolatile
4599 ? ASMBitFirstSetU32(fUnshadowedRegs)
4600 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4601 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4602 - 1;
4603 }
4604 }
4605 }
4606#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4607
4608 unsigned const idxReg = (fPreferVolatile
4609 ? ASMBitFirstSetU32(fRegs)
4610 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4611 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4612 - 1;
4613
4614 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4615 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4616 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4617 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4618
4619 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4620 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4621
4622 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4623 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4624 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4625 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4626 return idxReg;
4627 }
4628
4629 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4630
4631 /*
4632 * Try free up a variable that's in a register.
4633 *
4634 * We do two rounds here, first evacuating variables we don't need to be
4635 * saved on the stack, then in the second round move things to the stack.
4636 */
4637 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4638 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4639 {
4640 uint32_t fVars = pReNative->Core.bmVars;
4641 while (fVars)
4642 {
4643 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4644 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4645 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4646 continue;
4647
4648 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4649 && (RT_BIT_32(idxReg) & fRegMask)
4650 && ( iLoop == 0
4651 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4652 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4653 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4654 {
4655 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4656 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4657 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4658 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4659 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4660 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4661
4662 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4663 {
4664 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4665 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4666 }
4667
4668 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4669 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4670
4671 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4672 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4673 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4674 return idxReg;
4675 }
4676 fVars &= ~RT_BIT_32(idxVar);
4677 }
4678 }
4679
4680 AssertFailed();
4681 return UINT8_MAX;
4682}
4683
4684
4685/**
4686 * Flushes a set of guest register shadow copies.
4687 *
4688 * This is usually done after calling a threaded function or a C-implementation
4689 * of an instruction.
4690 *
4691 * @param pReNative The native recompile state.
4692 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4693 */
4694DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4695{
4696 /*
4697 * Reduce the mask by what's currently shadowed
4698 */
4699 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4700 fGstSimdRegs &= bmGstSimdRegShadows;
4701 if (fGstSimdRegs)
4702 {
4703 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4704 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4705 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4706 if (bmGstSimdRegShadowsNew)
4707 {
4708 /*
4709 * Partial.
4710 */
4711 do
4712 {
4713 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4714 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4715 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4716 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4717 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4718 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4719
4720 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4721 fGstSimdRegs &= ~fInThisHstReg;
4722 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4723 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4724 if (!fGstRegShadowsNew)
4725 {
4726 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4727 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4728 }
4729 } while (fGstSimdRegs != 0);
4730 }
4731 else
4732 {
4733 /*
4734 * Clear all.
4735 */
4736 do
4737 {
4738 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4739 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4740 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4741 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4742 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4743 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4744
4745 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4746 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4747 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4748 } while (fGstSimdRegs != 0);
4749 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4750 }
4751 }
4752}
4753
4754
4755/**
4756 * Allocates a temporary host SIMD register.
4757 *
4758 * This may emit code to save register content onto the stack in order to free
4759 * up a register.
4760 *
4761 * @returns The host register number; throws VBox status code on failure,
4762 * so no need to check the return value.
4763 * @param pReNative The native recompile state.
4764 * @param poff Pointer to the variable with the code buffer position.
4765 * This will be update if we need to move a variable from
4766 * register to stack in order to satisfy the request.
4767 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4768 * registers (@c true, default) or the other way around
4769 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4770 */
4771DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4772{
4773 /*
4774 * Try find a completely unused register, preferably a call-volatile one.
4775 */
4776 uint8_t idxSimdReg;
4777 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4778 & ~pReNative->Core.bmHstRegsWithGstShadow
4779 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4780 if (fRegs)
4781 {
4782 if (fPreferVolatile)
4783 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4784 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4785 else
4786 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4787 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4788 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4789 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4790
4791 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4792 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4793 }
4794 else
4795 {
4796 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4797 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4798 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4799 }
4800
4801 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4802 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4803}
4804
4805
4806/**
4807 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4808 * registers.
4809 *
4810 * @returns The host register number; throws VBox status code on failure,
4811 * so no need to check the return value.
4812 * @param pReNative The native recompile state.
4813 * @param poff Pointer to the variable with the code buffer position.
4814 * This will be update if we need to move a variable from
4815 * register to stack in order to satisfy the request.
4816 * @param fRegMask Mask of acceptable registers.
4817 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4818 * registers (@c true, default) or the other way around
4819 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4820 */
4821DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4822 bool fPreferVolatile /*= true*/)
4823{
4824 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4825 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4826
4827 /*
4828 * Try find a completely unused register, preferably a call-volatile one.
4829 */
4830 uint8_t idxSimdReg;
4831 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4832 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4833 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4834 & fRegMask;
4835 if (fRegs)
4836 {
4837 if (fPreferVolatile)
4838 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4839 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4840 else
4841 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4842 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4843 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4844 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4845
4846 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4847 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4848 }
4849 else
4850 {
4851 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4852 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4853 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4854 }
4855
4856 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4857 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4858}
4859
4860
4861/**
4862 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4863 *
4864 * @param pReNative The native recompile state.
4865 * @param idxHstSimdReg The host SIMD register to update the state for.
4866 * @param enmLoadSz The load size to set.
4867 */
4868DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4869 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4870{
4871 /* Everything valid already? -> nothing to do. */
4872 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4873 return;
4874
4875 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4876 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4877 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4878 {
4879 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4880 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4881 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4882 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4883 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4884 }
4885}
4886
4887
4888static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4889 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4890{
4891 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4892 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4893 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4894 {
4895# ifdef RT_ARCH_ARM64
4896 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4897 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4898# endif
4899
4900 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4901 {
4902 switch (enmLoadSzDst)
4903 {
4904 case kIemNativeGstSimdRegLdStSz_256:
4905 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4906 break;
4907 case kIemNativeGstSimdRegLdStSz_Low128:
4908 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4909 break;
4910 case kIemNativeGstSimdRegLdStSz_High128:
4911 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4912 break;
4913 default:
4914 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4915 }
4916
4917 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4918 }
4919 }
4920 else
4921 {
4922 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4923 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4924 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4925 }
4926
4927 return off;
4928}
4929
4930
4931/**
4932 * Allocates a temporary host SIMD register for keeping a guest
4933 * SIMD register value.
4934 *
4935 * Since we may already have a register holding the guest register value,
4936 * code will be emitted to do the loading if that's not the case. Code may also
4937 * be emitted if we have to free up a register to satify the request.
4938 *
4939 * @returns The host register number; throws VBox status code on failure, so no
4940 * need to check the return value.
4941 * @param pReNative The native recompile state.
4942 * @param poff Pointer to the variable with the code buffer
4943 * position. This will be update if we need to move a
4944 * variable from register to stack in order to satisfy
4945 * the request.
4946 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4947 * @param enmIntendedUse How the caller will be using the host register.
4948 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4949 * register is okay (default). The ASSUMPTION here is
4950 * that the caller has already flushed all volatile
4951 * registers, so this is only applied if we allocate a
4952 * new register.
4953 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4954 */
4955DECL_HIDDEN_THROW(uint8_t)
4956iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4957 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4958 bool fNoVolatileRegs /*= false*/)
4959{
4960 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4961#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4962 AssertMsg( pReNative->idxCurCall == 0
4963 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4964 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4965 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4966 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4967 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4968 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4969#endif
4970#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4971 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4972#endif
4973 uint32_t const fRegMask = !fNoVolatileRegs
4974 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4975 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4976
4977 /*
4978 * First check if the guest register value is already in a host register.
4979 */
4980 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4981 {
4982 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4983 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4984 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4985 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4986
4987 /* It's not supposed to be allocated... */
4988 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4989 {
4990 /*
4991 * If the register will trash the guest shadow copy, try find a
4992 * completely unused register we can use instead. If that fails,
4993 * we need to disassociate the host reg from the guest reg.
4994 */
4995 /** @todo would be nice to know if preserving the register is in any way helpful. */
4996 /* If the purpose is calculations, try duplicate the register value as
4997 we'll be clobbering the shadow. */
4998 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4999 && ( ~pReNative->Core.bmHstSimdRegs
5000 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5001 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5002 {
5003 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5004
5005 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5006
5007 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5008 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5009 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5010 idxSimdReg = idxRegNew;
5011 }
5012 /* If the current register matches the restrictions, go ahead and allocate
5013 it for the caller. */
5014 else if (fRegMask & RT_BIT_32(idxSimdReg))
5015 {
5016 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5017 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5018 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5019 {
5020 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5021 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5022 else
5023 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5024 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5025 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5026 }
5027 else
5028 {
5029 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5030 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5031 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5032 }
5033 }
5034 /* Otherwise, allocate a register that satisfies the caller and transfer
5035 the shadowing if compatible with the intended use. (This basically
5036 means the call wants a non-volatile register (RSP push/pop scenario).) */
5037 else
5038 {
5039 Assert(fNoVolatileRegs);
5040 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5041 !fNoVolatileRegs
5042 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5043 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5044 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5045 {
5046 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5047 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5048 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5049 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5050 }
5051 else
5052 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5053 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5054 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5055 idxSimdReg = idxRegNew;
5056 }
5057 }
5058 else
5059 {
5060 /*
5061 * Oops. Shadowed guest register already allocated!
5062 *
5063 * Allocate a new register, copy the value and, if updating, the
5064 * guest shadow copy assignment to the new register.
5065 */
5066 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5067 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5068 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5069 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5070
5071 /** @todo share register for readonly access. */
5072 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5073 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5074
5075 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5076 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5077 else
5078 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5079
5080 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5081 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5082 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5083 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5084 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5085 else
5086 {
5087 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5088 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5089 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5090 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5091 }
5092 idxSimdReg = idxRegNew;
5093 }
5094 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5095
5096#ifdef VBOX_STRICT
5097 /* Strict builds: Check that the value is correct. */
5098 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5099 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5100#endif
5101
5102 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5103 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5104 {
5105# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5106 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5107 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5108# endif
5109
5110 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5111 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5112 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5113 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5114 else
5115 {
5116 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5117 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5118 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5119 }
5120 }
5121
5122 return idxSimdReg;
5123 }
5124
5125 /*
5126 * Allocate a new register, load it with the guest value and designate it as a copy of the
5127 */
5128 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5129
5130 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5131 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5132 else
5133 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5134
5135 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5136 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5137
5138 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5139 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5140 {
5141# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5142 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5143 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5144# endif
5145
5146 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5147 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5148 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5149 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5150 else
5151 {
5152 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5153 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5154 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5155 }
5156 }
5157
5158 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5159 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5160
5161 return idxRegNew;
5162}
5163
5164
5165/**
5166 * Flushes guest SIMD register shadow copies held by a set of host registers.
5167 *
5168 * This is used whenever calling an external helper for ensuring that we don't carry on
5169 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5170 *
5171 * @param pReNative The native recompile state.
5172 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5173 */
5174DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5175{
5176 /*
5177 * Reduce the mask by what's currently shadowed.
5178 */
5179 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5180 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5181 if (fHstSimdRegs)
5182 {
5183 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5184 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5185 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5186 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5187 if (bmHstSimdRegsWithGstShadowNew)
5188 {
5189 /*
5190 * Partial (likely).
5191 */
5192 uint64_t fGstShadows = 0;
5193 do
5194 {
5195 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5196 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5197 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5198 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5199 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5200 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5201
5202 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5203 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5204 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5205 } while (fHstSimdRegs != 0);
5206 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5207 }
5208 else
5209 {
5210 /*
5211 * Clear all.
5212 */
5213 do
5214 {
5215 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5216 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5217 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5218 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5219 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5220 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5221
5222 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5223 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5224 } while (fHstSimdRegs != 0);
5225 pReNative->Core.bmGstSimdRegShadows = 0;
5226 }
5227 }
5228}
5229#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5230
5231
5232
5233/*********************************************************************************************************************************
5234* Code emitters for flushing pending guest register writes and sanity checks *
5235*********************************************************************************************************************************/
5236
5237#ifdef VBOX_STRICT
5238/**
5239 * Does internal register allocator sanity checks.
5240 */
5241DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5242{
5243 /*
5244 * Iterate host registers building a guest shadowing set.
5245 */
5246 uint64_t bmGstRegShadows = 0;
5247 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5248 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5249 while (bmHstRegsWithGstShadow)
5250 {
5251 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5252 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5253 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5254
5255 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5256 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5257 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5258 bmGstRegShadows |= fThisGstRegShadows;
5259 while (fThisGstRegShadows)
5260 {
5261 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5262 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5263 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5264 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5265 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5266 }
5267 }
5268 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5269 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5270 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5271
5272 /*
5273 * Now the other way around, checking the guest to host index array.
5274 */
5275 bmHstRegsWithGstShadow = 0;
5276 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5277 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5278 while (bmGstRegShadows)
5279 {
5280 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5281 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5282 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5283
5284 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5285 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5286 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5287 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5288 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5289 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5290 }
5291 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5292 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5293 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5294}
5295#endif /* VBOX_STRICT */
5296
5297
5298/**
5299 * Flushes any delayed guest register writes.
5300 *
5301 * This must be called prior to calling CImpl functions and any helpers that use
5302 * the guest state (like raising exceptions) and such.
5303 *
5304 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5305 * the caller if it wishes to do so.
5306 */
5307DECL_HIDDEN_THROW(uint32_t)
5308iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5309{
5310#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5311 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5312 off = iemNativeEmitPcWriteback(pReNative, off);
5313#else
5314 RT_NOREF(pReNative, fGstShwExcept);
5315#endif
5316
5317#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5318 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5319#endif
5320
5321#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5322 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5323#endif
5324
5325 return off;
5326}
5327
5328
5329#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5330/**
5331 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5332 */
5333DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5334{
5335 Assert(pReNative->Core.offPc);
5336# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5337 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5338 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5339# endif
5340
5341# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5342 /* Allocate a temporary PC register. */
5343 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5344
5345 /* Perform the addition and store the result. */
5346 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5347 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5348
5349 /* Free but don't flush the PC register. */
5350 iemNativeRegFreeTmp(pReNative, idxPcReg);
5351# else
5352 /* Compare the shadow with the context value, they should match. */
5353 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5354 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5355# endif
5356
5357 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5358 pReNative->Core.offPc = 0;
5359 pReNative->Core.cInstrPcUpdateSkipped = 0;
5360
5361 return off;
5362}
5363#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5364
5365
5366/*********************************************************************************************************************************
5367* Code Emitters (larger snippets) *
5368*********************************************************************************************************************************/
5369
5370/**
5371 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5372 * extending to 64-bit width.
5373 *
5374 * @returns New code buffer offset on success, UINT32_MAX on failure.
5375 * @param pReNative .
5376 * @param off The current code buffer position.
5377 * @param idxHstReg The host register to load the guest register value into.
5378 * @param enmGstReg The guest register to load.
5379 *
5380 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5381 * that is something the caller needs to do if applicable.
5382 */
5383DECL_HIDDEN_THROW(uint32_t)
5384iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5385{
5386 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5387 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5388
5389 switch (g_aGstShadowInfo[enmGstReg].cb)
5390 {
5391 case sizeof(uint64_t):
5392 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5393 case sizeof(uint32_t):
5394 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5395 case sizeof(uint16_t):
5396 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5397#if 0 /* not present in the table. */
5398 case sizeof(uint8_t):
5399 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5400#endif
5401 default:
5402 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5403 }
5404}
5405
5406
5407#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5408/**
5409 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5410 *
5411 * @returns New code buffer offset on success, UINT32_MAX on failure.
5412 * @param pReNative The recompiler state.
5413 * @param off The current code buffer position.
5414 * @param idxHstSimdReg The host register to load the guest register value into.
5415 * @param enmGstSimdReg The guest register to load.
5416 * @param enmLoadSz The load size of the register.
5417 *
5418 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5419 * that is something the caller needs to do if applicable.
5420 */
5421DECL_HIDDEN_THROW(uint32_t)
5422iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5423 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5424{
5425 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5426
5427 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5428 switch (enmLoadSz)
5429 {
5430 case kIemNativeGstSimdRegLdStSz_256:
5431 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5432 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5433 case kIemNativeGstSimdRegLdStSz_Low128:
5434 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5435 case kIemNativeGstSimdRegLdStSz_High128:
5436 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5437 default:
5438 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5439 }
5440}
5441#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5442
5443#ifdef VBOX_STRICT
5444
5445/**
5446 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5447 *
5448 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5449 * Trashes EFLAGS on AMD64.
5450 */
5451DECL_HIDDEN_THROW(uint32_t)
5452iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5453{
5454# ifdef RT_ARCH_AMD64
5455 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5456
5457 /* rol reg64, 32 */
5458 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5459 pbCodeBuf[off++] = 0xc1;
5460 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5461 pbCodeBuf[off++] = 32;
5462
5463 /* test reg32, ffffffffh */
5464 if (idxReg >= 8)
5465 pbCodeBuf[off++] = X86_OP_REX_B;
5466 pbCodeBuf[off++] = 0xf7;
5467 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5468 pbCodeBuf[off++] = 0xff;
5469 pbCodeBuf[off++] = 0xff;
5470 pbCodeBuf[off++] = 0xff;
5471 pbCodeBuf[off++] = 0xff;
5472
5473 /* je/jz +1 */
5474 pbCodeBuf[off++] = 0x74;
5475 pbCodeBuf[off++] = 0x01;
5476
5477 /* int3 */
5478 pbCodeBuf[off++] = 0xcc;
5479
5480 /* rol reg64, 32 */
5481 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5482 pbCodeBuf[off++] = 0xc1;
5483 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5484 pbCodeBuf[off++] = 32;
5485
5486# elif defined(RT_ARCH_ARM64)
5487 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5488 /* lsr tmp0, reg64, #32 */
5489 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5490 /* cbz tmp0, +1 */
5491 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5492 /* brk #0x1100 */
5493 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5494
5495# else
5496# error "Port me!"
5497# endif
5498 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5499 return off;
5500}
5501
5502
5503/**
5504 * Emitting code that checks that the content of register @a idxReg is the same
5505 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5506 * instruction if that's not the case.
5507 *
5508 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5509 * Trashes EFLAGS on AMD64.
5510 */
5511DECL_HIDDEN_THROW(uint32_t)
5512iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5513{
5514#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5515 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5516 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5517 return off;
5518#endif
5519
5520# ifdef RT_ARCH_AMD64
5521 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5522
5523 /* cmp reg, [mem] */
5524 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5525 {
5526 if (idxReg >= 8)
5527 pbCodeBuf[off++] = X86_OP_REX_R;
5528 pbCodeBuf[off++] = 0x38;
5529 }
5530 else
5531 {
5532 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5533 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5534 else
5535 {
5536 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5537 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5538 else
5539 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5540 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5541 if (idxReg >= 8)
5542 pbCodeBuf[off++] = X86_OP_REX_R;
5543 }
5544 pbCodeBuf[off++] = 0x39;
5545 }
5546 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5547
5548 /* je/jz +1 */
5549 pbCodeBuf[off++] = 0x74;
5550 pbCodeBuf[off++] = 0x01;
5551
5552 /* int3 */
5553 pbCodeBuf[off++] = 0xcc;
5554
5555 /* For values smaller than the register size, we must check that the rest
5556 of the register is all zeros. */
5557 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5558 {
5559 /* test reg64, imm32 */
5560 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5561 pbCodeBuf[off++] = 0xf7;
5562 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5563 pbCodeBuf[off++] = 0;
5564 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5565 pbCodeBuf[off++] = 0xff;
5566 pbCodeBuf[off++] = 0xff;
5567
5568 /* je/jz +1 */
5569 pbCodeBuf[off++] = 0x74;
5570 pbCodeBuf[off++] = 0x01;
5571
5572 /* int3 */
5573 pbCodeBuf[off++] = 0xcc;
5574 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5575 }
5576 else
5577 {
5578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5579 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5580 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5581 }
5582
5583# elif defined(RT_ARCH_ARM64)
5584 /* mov TMP0, [gstreg] */
5585 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5586
5587 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5588 /* sub tmp0, tmp0, idxReg */
5589 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5590 /* cbz tmp0, +1 */
5591 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5592 /* brk #0x1000+enmGstReg */
5593 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5595
5596# else
5597# error "Port me!"
5598# endif
5599 return off;
5600}
5601
5602
5603# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5604# ifdef RT_ARCH_AMD64
5605/**
5606 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5607 */
5608DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5609{
5610 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5611 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5612 if (idxSimdReg >= 8)
5613 pbCodeBuf[off++] = X86_OP_REX_R;
5614 pbCodeBuf[off++] = 0x0f;
5615 pbCodeBuf[off++] = 0x38;
5616 pbCodeBuf[off++] = 0x29;
5617 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5618
5619 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5620 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5621 pbCodeBuf[off++] = X86_OP_REX_W
5622 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5623 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5624 pbCodeBuf[off++] = 0x0f;
5625 pbCodeBuf[off++] = 0x3a;
5626 pbCodeBuf[off++] = 0x16;
5627 pbCodeBuf[off++] = 0xeb;
5628 pbCodeBuf[off++] = 0x00;
5629
5630 /* cmp tmp0, 0xffffffffffffffff. */
5631 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5632 pbCodeBuf[off++] = 0x83;
5633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5634 pbCodeBuf[off++] = 0xff;
5635
5636 /* je/jz +1 */
5637 pbCodeBuf[off++] = 0x74;
5638 pbCodeBuf[off++] = 0x01;
5639
5640 /* int3 */
5641 pbCodeBuf[off++] = 0xcc;
5642
5643 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5644 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5645 pbCodeBuf[off++] = X86_OP_REX_W
5646 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5647 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5648 pbCodeBuf[off++] = 0x0f;
5649 pbCodeBuf[off++] = 0x3a;
5650 pbCodeBuf[off++] = 0x16;
5651 pbCodeBuf[off++] = 0xeb;
5652 pbCodeBuf[off++] = 0x01;
5653
5654 /* cmp tmp0, 0xffffffffffffffff. */
5655 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5656 pbCodeBuf[off++] = 0x83;
5657 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5658 pbCodeBuf[off++] = 0xff;
5659
5660 /* je/jz +1 */
5661 pbCodeBuf[off++] = 0x74;
5662 pbCodeBuf[off++] = 0x01;
5663
5664 /* int3 */
5665 pbCodeBuf[off++] = 0xcc;
5666
5667 return off;
5668}
5669# endif
5670
5671
5672/**
5673 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5674 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5675 * instruction if that's not the case.
5676 *
5677 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5678 * Trashes EFLAGS on AMD64.
5679 */
5680DECL_HIDDEN_THROW(uint32_t)
5681iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5682 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5683{
5684 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5685 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5686 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5687 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5688 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5689 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5690 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5691 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5692 return off;
5693
5694# ifdef RT_ARCH_AMD64
5695 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5696 {
5697 /* movdqa vectmp0, idxSimdReg */
5698 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5699
5700 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5701
5702 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5703 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5704 }
5705
5706 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5707 {
5708 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5709 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5710
5711 /* vextracti128 vectmp0, idxSimdReg, 1 */
5712 pbCodeBuf[off++] = X86_OP_VEX3;
5713 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5714 | X86_OP_VEX3_BYTE1_X
5715 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5716 | 0x03; /* Opcode map */
5717 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5718 pbCodeBuf[off++] = 0x39;
5719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5720 pbCodeBuf[off++] = 0x01;
5721
5722 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5723 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5724 }
5725# elif defined(RT_ARCH_ARM64)
5726 /* mov vectmp0, [gstreg] */
5727 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5728
5729 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5730 {
5731 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5732 /* eor vectmp0, vectmp0, idxSimdReg */
5733 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5734 /* uaddlv vectmp0, vectmp0.16B */
5735 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5736 /* umov tmp0, vectmp0.H[0] */
5737 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5738 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5739 /* cbz tmp0, +1 */
5740 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5741 /* brk #0x1000+enmGstReg */
5742 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5743 }
5744
5745 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5746 {
5747 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5748 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5749 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5750 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5751 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5752 /* umov tmp0, (vectmp0 + 1).H[0] */
5753 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5754 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5755 /* cbz tmp0, +1 */
5756 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5757 /* brk #0x1000+enmGstReg */
5758 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5759 }
5760
5761# else
5762# error "Port me!"
5763# endif
5764
5765 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5766 return off;
5767}
5768# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5769
5770
5771/**
5772 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5773 * important bits.
5774 *
5775 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5776 * Trashes EFLAGS on AMD64.
5777 */
5778DECL_HIDDEN_THROW(uint32_t)
5779iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5780{
5781 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5782 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5783 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5784 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5785
5786#ifdef RT_ARCH_AMD64
5787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5788
5789 /* je/jz +1 */
5790 pbCodeBuf[off++] = 0x74;
5791 pbCodeBuf[off++] = 0x01;
5792
5793 /* int3 */
5794 pbCodeBuf[off++] = 0xcc;
5795
5796# elif defined(RT_ARCH_ARM64)
5797 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5798
5799 /* b.eq +1 */
5800 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5801 /* brk #0x2000 */
5802 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5803
5804# else
5805# error "Port me!"
5806# endif
5807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5808
5809 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5810 return off;
5811}
5812
5813#endif /* VBOX_STRICT */
5814
5815
5816#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5817/**
5818 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5819 */
5820DECL_HIDDEN_THROW(uint32_t)
5821iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5822{
5823 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5824
5825 fEflNeeded &= X86_EFL_STATUS_BITS;
5826 if (fEflNeeded)
5827 {
5828# ifdef RT_ARCH_AMD64
5829 /* test dword [pVCpu + offVCpu], imm32 */
5830 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5831 if (fEflNeeded <= 0xff)
5832 {
5833 pCodeBuf[off++] = 0xf6;
5834 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5835 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5836 }
5837 else
5838 {
5839 pCodeBuf[off++] = 0xf7;
5840 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5841 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5842 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5843 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5844 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5845 }
5846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5847
5848# else
5849 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5850 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5851 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5852# ifdef RT_ARCH_ARM64
5853 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5854 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5855# else
5856# error "Port me!"
5857# endif
5858 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5859# endif
5860 }
5861 return off;
5862}
5863#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5864
5865
5866/**
5867 * Emits a code for checking the return code of a call and rcPassUp, returning
5868 * from the code if either are non-zero.
5869 */
5870DECL_HIDDEN_THROW(uint32_t)
5871iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5872{
5873#ifdef RT_ARCH_AMD64
5874 /*
5875 * AMD64: eax = call status code.
5876 */
5877
5878 /* edx = rcPassUp */
5879 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5880# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5881 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5882# endif
5883
5884 /* edx = eax | rcPassUp */
5885 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5886 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5887 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5888 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5889
5890 /* Jump to non-zero status return path. */
5891 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5892
5893 /* done. */
5894
5895#elif RT_ARCH_ARM64
5896 /*
5897 * ARM64: w0 = call status code.
5898 */
5899# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5900 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5901# endif
5902 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5903
5904 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5905
5906 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5907
5908 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5909 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5910 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5911
5912#else
5913# error "port me"
5914#endif
5915 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5916 RT_NOREF_PV(idxInstr);
5917 return off;
5918}
5919
5920
5921/**
5922 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5923 * raising a \#GP(0) if it isn't.
5924 *
5925 * @returns New code buffer offset, UINT32_MAX on failure.
5926 * @param pReNative The native recompile state.
5927 * @param off The code buffer offset.
5928 * @param idxAddrReg The host register with the address to check.
5929 * @param idxInstr The current instruction.
5930 */
5931DECL_HIDDEN_THROW(uint32_t)
5932iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5933{
5934 /*
5935 * Make sure we don't have any outstanding guest register writes as we may
5936 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5937 */
5938 off = iemNativeRegFlushPendingWrites(pReNative, off);
5939
5940#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5941 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5942#else
5943 RT_NOREF(idxInstr);
5944#endif
5945
5946#ifdef RT_ARCH_AMD64
5947 /*
5948 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5949 * return raisexcpt();
5950 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5951 */
5952 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5953
5954 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5955 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5956 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5957 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5958 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5959
5960 iemNativeRegFreeTmp(pReNative, iTmpReg);
5961
5962#elif defined(RT_ARCH_ARM64)
5963 /*
5964 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5965 * return raisexcpt();
5966 * ----
5967 * mov x1, 0x800000000000
5968 * add x1, x0, x1
5969 * cmp xzr, x1, lsr 48
5970 * b.ne .Lraisexcpt
5971 */
5972 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5973
5974 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5975 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5976 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5977 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5978
5979 iemNativeRegFreeTmp(pReNative, iTmpReg);
5980
5981#else
5982# error "Port me"
5983#endif
5984 return off;
5985}
5986
5987
5988/**
5989 * Emits code to check if that the content of @a idxAddrReg is within the limit
5990 * of CS, raising a \#GP(0) if it isn't.
5991 *
5992 * @returns New code buffer offset; throws VBox status code on error.
5993 * @param pReNative The native recompile state.
5994 * @param off The code buffer offset.
5995 * @param idxAddrReg The host register (32-bit) with the address to
5996 * check.
5997 * @param idxInstr The current instruction.
5998 */
5999DECL_HIDDEN_THROW(uint32_t)
6000iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6001 uint8_t idxAddrReg, uint8_t idxInstr)
6002{
6003 /*
6004 * Make sure we don't have any outstanding guest register writes as we may
6005 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6006 */
6007 off = iemNativeRegFlushPendingWrites(pReNative, off);
6008
6009#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6010 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6011#else
6012 RT_NOREF(idxInstr);
6013#endif
6014
6015 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6016 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6017 kIemNativeGstRegUse_ReadOnly);
6018
6019 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6020 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6021
6022 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6023 return off;
6024}
6025
6026
6027/**
6028 * Emits a call to a CImpl function or something similar.
6029 */
6030DECL_HIDDEN_THROW(uint32_t)
6031iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6032 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6033{
6034 /* Writeback everything. */
6035 off = iemNativeRegFlushPendingWrites(pReNative, off);
6036
6037 /*
6038 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6039 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6040 */
6041 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6042 fGstShwFlush
6043 | RT_BIT_64(kIemNativeGstReg_Pc)
6044 | RT_BIT_64(kIemNativeGstReg_EFlags));
6045 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6046
6047 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6048
6049 /*
6050 * Load the parameters.
6051 */
6052#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6053 /* Special code the hidden VBOXSTRICTRC pointer. */
6054 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6055 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6056 if (cAddParams > 0)
6057 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6058 if (cAddParams > 1)
6059 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6060 if (cAddParams > 2)
6061 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6062 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6063
6064#else
6065 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6066 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6067 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6068 if (cAddParams > 0)
6069 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6070 if (cAddParams > 1)
6071 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6072 if (cAddParams > 2)
6073# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6074 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6075# else
6076 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6077# endif
6078#endif
6079
6080 /*
6081 * Make the call.
6082 */
6083 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6084
6085#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6086 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6087#endif
6088
6089 /*
6090 * Check the status code.
6091 */
6092 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6093}
6094
6095
6096/**
6097 * Emits a call to a threaded worker function.
6098 */
6099DECL_HIDDEN_THROW(uint32_t)
6100iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6101{
6102 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6103
6104 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6105 off = iemNativeRegFlushPendingWrites(pReNative, off);
6106
6107 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6108 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6109
6110#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6111 /* The threaded function may throw / long jmp, so set current instruction
6112 number if we're counting. */
6113 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6114#endif
6115
6116 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6117
6118#ifdef RT_ARCH_AMD64
6119 /* Load the parameters and emit the call. */
6120# ifdef RT_OS_WINDOWS
6121# ifndef VBOXSTRICTRC_STRICT_ENABLED
6122 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6123 if (cParams > 0)
6124 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6125 if (cParams > 1)
6126 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6127 if (cParams > 2)
6128 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6129# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6130 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6131 if (cParams > 0)
6132 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6133 if (cParams > 1)
6134 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6135 if (cParams > 2)
6136 {
6137 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6138 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6139 }
6140 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6141# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6142# else
6143 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6144 if (cParams > 0)
6145 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6146 if (cParams > 1)
6147 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6148 if (cParams > 2)
6149 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6150# endif
6151
6152 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6153
6154# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6155 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6156# endif
6157
6158#elif RT_ARCH_ARM64
6159 /*
6160 * ARM64:
6161 */
6162 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6163 if (cParams > 0)
6164 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6165 if (cParams > 1)
6166 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6167 if (cParams > 2)
6168 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6169
6170 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6171
6172#else
6173# error "port me"
6174#endif
6175
6176 /*
6177 * Check the status code.
6178 */
6179 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6180
6181 return off;
6182}
6183
6184#ifdef VBOX_WITH_STATISTICS
6185
6186/**
6187 * Emits code to update the thread call statistics.
6188 */
6189DECL_INLINE_THROW(uint32_t)
6190iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6191{
6192 /*
6193 * Update threaded function stats.
6194 */
6195 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6196 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6197# if defined(RT_ARCH_ARM64)
6198 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6199 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6200 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6201 iemNativeRegFreeTmp(pReNative, idxTmp1);
6202 iemNativeRegFreeTmp(pReNative, idxTmp2);
6203# else
6204 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6205# endif
6206 return off;
6207}
6208
6209
6210/**
6211 * Emits code to update the TB exit reason statistics.
6212 */
6213DECL_INLINE_THROW(uint32_t)
6214iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6215{
6216 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6217 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6218 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6219 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6220 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6221
6222 return off;
6223}
6224
6225#endif /* VBOX_WITH_STATISTICS */
6226
6227/**
6228 * Emits the code at the ReturnWithFlags label (returns
6229 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6230 */
6231static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6232{
6233 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6234 if (idxLabel != UINT32_MAX)
6235 {
6236 iemNativeLabelDefine(pReNative, idxLabel, off);
6237
6238#ifdef VBOX_WITH_STATISTICS
6239 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitReturnWithFlags));
6240#endif
6241
6242 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6243
6244 /* jump back to the return sequence. */
6245 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6246 }
6247 return off;
6248}
6249
6250
6251/**
6252 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6253 */
6254static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6255{
6256 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6257 if (idxLabel != UINT32_MAX)
6258 {
6259 iemNativeLabelDefine(pReNative, idxLabel, off);
6260
6261#ifdef VBOX_WITH_STATISTICS
6262 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitReturnBreak));
6263#endif
6264
6265 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6266
6267 /* jump back to the return sequence. */
6268 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6269 }
6270 return off;
6271}
6272
6273
6274/**
6275 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6276 */
6277static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6278{
6279 /*
6280 * Generate the rc + rcPassUp fiddling code if needed.
6281 */
6282 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6283 if (idxLabel != UINT32_MAX)
6284 {
6285 iemNativeLabelDefine(pReNative, idxLabel, off);
6286
6287 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6288#ifdef RT_ARCH_AMD64
6289# ifdef RT_OS_WINDOWS
6290# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6291 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6292# endif
6293 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6294 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6295# else
6296 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6297 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6298# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6299 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6300# endif
6301# endif
6302# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6303 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6304# endif
6305
6306#else
6307 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6308 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6309 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6310#endif
6311
6312 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6313 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6314 }
6315 return off;
6316}
6317
6318
6319/**
6320 * Emits a standard epilog.
6321 */
6322static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6323{
6324 *pidxReturnLabel = UINT32_MAX;
6325
6326 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6327 off = iemNativeRegFlushPendingWrites(pReNative, off);
6328
6329 /*
6330 * Successful return, so clear the return register (eax, w0).
6331 */
6332 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6333 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6334
6335 /*
6336 * Define label for common return point.
6337 */
6338 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6339 *pidxReturnLabel = idxReturn;
6340
6341 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6342
6343 /*
6344 * Restore registers and return.
6345 */
6346#ifdef RT_ARCH_AMD64
6347 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6348
6349 /* Reposition esp at the r15 restore point. */
6350 pbCodeBuf[off++] = X86_OP_REX_W;
6351 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6352 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6353 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6354
6355 /* Pop non-volatile registers and return */
6356 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6357 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6358 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6359 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6360 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6361 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6362 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6363 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6364# ifdef RT_OS_WINDOWS
6365 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6366 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6367# endif
6368 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6369 pbCodeBuf[off++] = 0xc9; /* leave */
6370 pbCodeBuf[off++] = 0xc3; /* ret */
6371 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6372
6373#elif RT_ARCH_ARM64
6374 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6375
6376 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6377 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6378 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6379 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6380 IEMNATIVE_FRAME_VAR_SIZE / 8);
6381 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6382 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6383 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6384 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6385 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6386 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6387 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6388 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6389 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6390 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6391 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6392 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6393
6394 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6395 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6396 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6397 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6398
6399 /* retab / ret */
6400# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6401 if (1)
6402 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6403 else
6404# endif
6405 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6406
6407#else
6408# error "port me"
6409#endif
6410 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6411
6412 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6413 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6414
6415 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6416}
6417
6418
6419#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6420/**
6421 * Emits a standard prolog.
6422 */
6423static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6424{
6425#ifdef RT_ARCH_AMD64
6426 /*
6427 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6428 * reserving 64 bytes for stack variables plus 4 non-register argument
6429 * slots. Fixed register assignment: xBX = pReNative;
6430 *
6431 * Since we always do the same register spilling, we can use the same
6432 * unwind description for all the code.
6433 */
6434 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6435 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6436 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6437 pbCodeBuf[off++] = 0x8b;
6438 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6439 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6440 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6441# ifdef RT_OS_WINDOWS
6442 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6443 pbCodeBuf[off++] = 0x8b;
6444 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6445 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6446 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6447# else
6448 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6449 pbCodeBuf[off++] = 0x8b;
6450 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6451# endif
6452 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6453 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6454 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6455 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6456 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6457 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6458 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6459 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6460
6461# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6462 /* Save the frame pointer. */
6463 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6464# endif
6465
6466 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6467 X86_GREG_xSP,
6468 IEMNATIVE_FRAME_ALIGN_SIZE
6469 + IEMNATIVE_FRAME_VAR_SIZE
6470 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6471 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6472 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6473 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6474 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6475
6476#elif RT_ARCH_ARM64
6477 /*
6478 * We set up a stack frame exactly like on x86, only we have to push the
6479 * return address our selves here. We save all non-volatile registers.
6480 */
6481 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6482
6483# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6484 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6485 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6486 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6487 /* pacibsp */
6488 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6489# endif
6490
6491 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6492 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6493 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6494 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6495 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6496 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6497 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6498 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6499 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6500 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6501 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6502 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6503 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6504 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6505 /* Save the BP and LR (ret address) registers at the top of the frame. */
6506 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6507 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6508 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6509 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6510 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6511 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6512
6513 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6514 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6515
6516 /* mov r28, r0 */
6517 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6518 /* mov r27, r1 */
6519 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6520
6521# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6522 /* Save the frame pointer. */
6523 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6524 ARMV8_A64_REG_X2);
6525# endif
6526
6527#else
6528# error "port me"
6529#endif
6530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6531 return off;
6532}
6533#endif
6534
6535
6536/*********************************************************************************************************************************
6537* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6538*********************************************************************************************************************************/
6539
6540/**
6541 * Internal work that allocates a variable with kind set to
6542 * kIemNativeVarKind_Invalid and no current stack allocation.
6543 *
6544 * The kind will either be set by the caller or later when the variable is first
6545 * assigned a value.
6546 *
6547 * @returns Unpacked index.
6548 * @internal
6549 */
6550static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6551{
6552 Assert(cbType > 0 && cbType <= 64);
6553 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6554 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6555 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6556 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6557 pReNative->Core.aVars[idxVar].cbVar = cbType;
6558 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6559 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6560 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6561 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6562 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6563 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6564 pReNative->Core.aVars[idxVar].u.uValue = 0;
6565#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6566 pReNative->Core.aVars[idxVar].fSimdReg = false;
6567#endif
6568 return idxVar;
6569}
6570
6571
6572/**
6573 * Internal work that allocates an argument variable w/o setting enmKind.
6574 *
6575 * @returns Unpacked index.
6576 * @internal
6577 */
6578static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6579{
6580 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6581 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6582 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6583
6584 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6585 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6586 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6587 return idxVar;
6588}
6589
6590
6591/**
6592 * Gets the stack slot for a stack variable, allocating one if necessary.
6593 *
6594 * Calling this function implies that the stack slot will contain a valid
6595 * variable value. The caller deals with any register currently assigned to the
6596 * variable, typically by spilling it into the stack slot.
6597 *
6598 * @returns The stack slot number.
6599 * @param pReNative The recompiler state.
6600 * @param idxVar The variable.
6601 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6602 */
6603DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6604{
6605 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6606 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6607 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6608
6609 /* Already got a slot? */
6610 uint8_t const idxStackSlot = pVar->idxStackSlot;
6611 if (idxStackSlot != UINT8_MAX)
6612 {
6613 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6614 return idxStackSlot;
6615 }
6616
6617 /*
6618 * A single slot is easy to allocate.
6619 * Allocate them from the top end, closest to BP, to reduce the displacement.
6620 */
6621 if (pVar->cbVar <= sizeof(uint64_t))
6622 {
6623 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6624 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6625 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6626 pVar->idxStackSlot = (uint8_t)iSlot;
6627 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6628 return (uint8_t)iSlot;
6629 }
6630
6631 /*
6632 * We need more than one stack slot.
6633 *
6634 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6635 */
6636 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6637 Assert(pVar->cbVar <= 64);
6638 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6639 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6640 uint32_t bmStack = pReNative->Core.bmStack;
6641 while (bmStack != UINT32_MAX)
6642 {
6643 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6644 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6645 iSlot = (iSlot - 1) & ~fBitAlignMask;
6646 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6647 {
6648 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6649 pVar->idxStackSlot = (uint8_t)iSlot;
6650 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6651 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6652 return (uint8_t)iSlot;
6653 }
6654
6655 bmStack |= (fBitAllocMask << iSlot);
6656 }
6657 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6658}
6659
6660
6661/**
6662 * Changes the variable to a stack variable.
6663 *
6664 * Currently this is s only possible to do the first time the variable is used,
6665 * switching later is can be implemented but not done.
6666 *
6667 * @param pReNative The recompiler state.
6668 * @param idxVar The variable.
6669 * @throws VERR_IEM_VAR_IPE_2
6670 */
6671DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6672{
6673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6674 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6675 if (pVar->enmKind != kIemNativeVarKind_Stack)
6676 {
6677 /* We could in theory transition from immediate to stack as well, but it
6678 would involve the caller doing work storing the value on the stack. So,
6679 till that's required we only allow transition from invalid. */
6680 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6681 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6682 pVar->enmKind = kIemNativeVarKind_Stack;
6683
6684 /* Note! We don't allocate a stack slot here, that's only done when a
6685 slot is actually needed to hold a variable value. */
6686 }
6687}
6688
6689
6690/**
6691 * Sets it to a variable with a constant value.
6692 *
6693 * This does not require stack storage as we know the value and can always
6694 * reload it, unless of course it's referenced.
6695 *
6696 * @param pReNative The recompiler state.
6697 * @param idxVar The variable.
6698 * @param uValue The immediate value.
6699 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6700 */
6701DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6702{
6703 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6704 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6705 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6706 {
6707 /* Only simple transitions for now. */
6708 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6709 pVar->enmKind = kIemNativeVarKind_Immediate;
6710 }
6711 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6712
6713 pVar->u.uValue = uValue;
6714 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6715 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6716 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6717}
6718
6719
6720/**
6721 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6722 *
6723 * This does not require stack storage as we know the value and can always
6724 * reload it. Loading is postponed till needed.
6725 *
6726 * @param pReNative The recompiler state.
6727 * @param idxVar The variable. Unpacked.
6728 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6729 *
6730 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6731 * @internal
6732 */
6733static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6734{
6735 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6736 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6737
6738 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6739 {
6740 /* Only simple transitions for now. */
6741 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6742 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6743 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6744 }
6745 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6746
6747 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6748
6749 /* Update the other variable, ensure it's a stack variable. */
6750 /** @todo handle variables with const values... that'll go boom now. */
6751 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6752 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6753}
6754
6755
6756/**
6757 * Sets the variable to a reference (pointer) to a guest register reference.
6758 *
6759 * This does not require stack storage as we know the value and can always
6760 * reload it. Loading is postponed till needed.
6761 *
6762 * @param pReNative The recompiler state.
6763 * @param idxVar The variable.
6764 * @param enmRegClass The class guest registers to reference.
6765 * @param idxReg The register within @a enmRegClass to reference.
6766 *
6767 * @throws VERR_IEM_VAR_IPE_2
6768 */
6769DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6770 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6771{
6772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6773 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6774
6775 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6776 {
6777 /* Only simple transitions for now. */
6778 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6779 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6780 }
6781 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6782
6783 pVar->u.GstRegRef.enmClass = enmRegClass;
6784 pVar->u.GstRegRef.idx = idxReg;
6785}
6786
6787
6788DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6789{
6790 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6791}
6792
6793
6794DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6795{
6796 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6797
6798 /* Since we're using a generic uint64_t value type, we must truncate it if
6799 the variable is smaller otherwise we may end up with too large value when
6800 scaling up a imm8 w/ sign-extension.
6801
6802 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6803 in the bios, bx=1) when running on arm, because clang expect 16-bit
6804 register parameters to have bits 16 and up set to zero. Instead of
6805 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6806 CF value in the result. */
6807 switch (cbType)
6808 {
6809 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6810 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6811 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6812 }
6813 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6814 return idxVar;
6815}
6816
6817
6818DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6819{
6820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6821 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6822 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6823 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6824 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6825 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6826
6827 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6828 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6829 return idxArgVar;
6830}
6831
6832
6833DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6834{
6835 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6836 /* Don't set to stack now, leave that to the first use as for instance
6837 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6838 return idxVar;
6839}
6840
6841
6842DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6843{
6844 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6845
6846 /* Since we're using a generic uint64_t value type, we must truncate it if
6847 the variable is smaller otherwise we may end up with too large value when
6848 scaling up a imm8 w/ sign-extension. */
6849 switch (cbType)
6850 {
6851 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6852 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6853 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6854 }
6855 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6856 return idxVar;
6857}
6858
6859
6860DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6861{
6862 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6863 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6864
6865 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6866 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6867
6868 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6869
6870 /* Truncate the value to this variables size. */
6871 switch (cbType)
6872 {
6873 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6874 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6875 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6876 }
6877
6878 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6879 iemNativeVarRegisterRelease(pReNative, idxVar);
6880 return idxVar;
6881}
6882
6883
6884/**
6885 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6886 * fixed till we call iemNativeVarRegisterRelease.
6887 *
6888 * @returns The host register number.
6889 * @param pReNative The recompiler state.
6890 * @param idxVar The variable.
6891 * @param poff Pointer to the instruction buffer offset.
6892 * In case a register needs to be freed up or the value
6893 * loaded off the stack.
6894 * @param fInitialized Set if the variable must already have been initialized.
6895 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6896 * the case.
6897 * @param idxRegPref Preferred register number or UINT8_MAX.
6898 */
6899DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6900 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6901{
6902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6904 Assert(pVar->cbVar <= 8);
6905 Assert(!pVar->fRegAcquired);
6906
6907 uint8_t idxReg = pVar->idxReg;
6908 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6909 {
6910 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6911 && pVar->enmKind < kIemNativeVarKind_End);
6912 pVar->fRegAcquired = true;
6913 return idxReg;
6914 }
6915
6916 /*
6917 * If the kind of variable has not yet been set, default to 'stack'.
6918 */
6919 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6920 && pVar->enmKind < kIemNativeVarKind_End);
6921 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6922 iemNativeVarSetKindToStack(pReNative, idxVar);
6923
6924 /*
6925 * We have to allocate a register for the variable, even if its a stack one
6926 * as we don't know if there are modification being made to it before its
6927 * finalized (todo: analyze and insert hints about that?).
6928 *
6929 * If we can, we try get the correct register for argument variables. This
6930 * is assuming that most argument variables are fetched as close as possible
6931 * to the actual call, so that there aren't any interfering hidden calls
6932 * (memory accesses, etc) inbetween.
6933 *
6934 * If we cannot or it's a variable, we make sure no argument registers
6935 * that will be used by this MC block will be allocated here, and we always
6936 * prefer non-volatile registers to avoid needing to spill stuff for internal
6937 * call.
6938 */
6939 /** @todo Detect too early argument value fetches and warn about hidden
6940 * calls causing less optimal code to be generated in the python script. */
6941
6942 uint8_t const uArgNo = pVar->uArgNo;
6943 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6944 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6945 {
6946 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6947
6948#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6949 /* Writeback any dirty shadow registers we are about to unshadow. */
6950 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6951#endif
6952
6953 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6954 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6955 }
6956 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6957 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6958 {
6959 /** @todo there must be a better way for this and boot cArgsX? */
6960 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6961 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6962 & ~pReNative->Core.bmHstRegsWithGstShadow
6963 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6964 & fNotArgsMask;
6965 if (fRegs)
6966 {
6967 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6968 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6969 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6970 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6971 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6972 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6973 }
6974 else
6975 {
6976 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6977 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6978 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6979 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6980 }
6981 }
6982 else
6983 {
6984 idxReg = idxRegPref;
6985 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6986 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6987 }
6988 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6989 pVar->idxReg = idxReg;
6990
6991#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6992 pVar->fSimdReg = false;
6993#endif
6994
6995 /*
6996 * Load it off the stack if we've got a stack slot.
6997 */
6998 uint8_t const idxStackSlot = pVar->idxStackSlot;
6999 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7000 {
7001 Assert(fInitialized);
7002 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7003 switch (pVar->cbVar)
7004 {
7005 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7006 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7007 case 3: AssertFailed(); RT_FALL_THRU();
7008 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7009 default: AssertFailed(); RT_FALL_THRU();
7010 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7011 }
7012 }
7013 else
7014 {
7015 Assert(idxStackSlot == UINT8_MAX);
7016 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7017 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7018 else
7019 {
7020 /*
7021 * Convert from immediate to stack/register. This is currently only
7022 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7023 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7024 */
7025 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7026 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7027 idxVar, idxReg, pVar->u.uValue));
7028 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7029 pVar->enmKind = kIemNativeVarKind_Stack;
7030 }
7031 }
7032
7033 pVar->fRegAcquired = true;
7034 return idxReg;
7035}
7036
7037
7038#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7039/**
7040 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7041 * fixed till we call iemNativeVarRegisterRelease.
7042 *
7043 * @returns The host register number.
7044 * @param pReNative The recompiler state.
7045 * @param idxVar The variable.
7046 * @param poff Pointer to the instruction buffer offset.
7047 * In case a register needs to be freed up or the value
7048 * loaded off the stack.
7049 * @param fInitialized Set if the variable must already have been initialized.
7050 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7051 * the case.
7052 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7053 */
7054DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7055 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7056{
7057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7058 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7059 Assert( pVar->cbVar == sizeof(RTUINT128U)
7060 || pVar->cbVar == sizeof(RTUINT256U));
7061 Assert(!pVar->fRegAcquired);
7062
7063 uint8_t idxReg = pVar->idxReg;
7064 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7065 {
7066 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7067 && pVar->enmKind < kIemNativeVarKind_End);
7068 pVar->fRegAcquired = true;
7069 return idxReg;
7070 }
7071
7072 /*
7073 * If the kind of variable has not yet been set, default to 'stack'.
7074 */
7075 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7076 && pVar->enmKind < kIemNativeVarKind_End);
7077 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7078 iemNativeVarSetKindToStack(pReNative, idxVar);
7079
7080 /*
7081 * We have to allocate a register for the variable, even if its a stack one
7082 * as we don't know if there are modification being made to it before its
7083 * finalized (todo: analyze and insert hints about that?).
7084 *
7085 * If we can, we try get the correct register for argument variables. This
7086 * is assuming that most argument variables are fetched as close as possible
7087 * to the actual call, so that there aren't any interfering hidden calls
7088 * (memory accesses, etc) inbetween.
7089 *
7090 * If we cannot or it's a variable, we make sure no argument registers
7091 * that will be used by this MC block will be allocated here, and we always
7092 * prefer non-volatile registers to avoid needing to spill stuff for internal
7093 * call.
7094 */
7095 /** @todo Detect too early argument value fetches and warn about hidden
7096 * calls causing less optimal code to be generated in the python script. */
7097
7098 uint8_t const uArgNo = pVar->uArgNo;
7099 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7100
7101 /* SIMD is bit simpler for now because there is no support for arguments. */
7102 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7103 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7104 {
7105 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7106 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7107 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7108 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7109 & fNotArgsMask;
7110 if (fRegs)
7111 {
7112 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7113 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7114 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7115 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7116 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7117 }
7118 else
7119 {
7120 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7121 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7122 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7123 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7124 }
7125 }
7126 else
7127 {
7128 idxReg = idxRegPref;
7129 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7130 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7131 }
7132 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7133
7134 pVar->fSimdReg = true;
7135 pVar->idxReg = idxReg;
7136
7137 /*
7138 * Load it off the stack if we've got a stack slot.
7139 */
7140 uint8_t const idxStackSlot = pVar->idxStackSlot;
7141 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7142 {
7143 Assert(fInitialized);
7144 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7145 switch (pVar->cbVar)
7146 {
7147 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7148 default: AssertFailed(); RT_FALL_THRU();
7149 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7150 }
7151 }
7152 else
7153 {
7154 Assert(idxStackSlot == UINT8_MAX);
7155 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7156 }
7157 pVar->fRegAcquired = true;
7158 return idxReg;
7159}
7160#endif
7161
7162
7163/**
7164 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7165 * guest register.
7166 *
7167 * This function makes sure there is a register for it and sets it to be the
7168 * current shadow copy of @a enmGstReg.
7169 *
7170 * @returns The host register number.
7171 * @param pReNative The recompiler state.
7172 * @param idxVar The variable.
7173 * @param enmGstReg The guest register this variable will be written to
7174 * after this call.
7175 * @param poff Pointer to the instruction buffer offset.
7176 * In case a register needs to be freed up or if the
7177 * variable content needs to be loaded off the stack.
7178 *
7179 * @note We DO NOT expect @a idxVar to be an argument variable,
7180 * because we can only in the commit stage of an instruction when this
7181 * function is used.
7182 */
7183DECL_HIDDEN_THROW(uint8_t)
7184iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7185{
7186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7187 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7188 Assert(!pVar->fRegAcquired);
7189 AssertMsgStmt( pVar->cbVar <= 8
7190 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7191 || pVar->enmKind == kIemNativeVarKind_Stack),
7192 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7193 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7194 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7195
7196 /*
7197 * This shouldn't ever be used for arguments, unless it's in a weird else
7198 * branch that doesn't do any calling and even then it's questionable.
7199 *
7200 * However, in case someone writes crazy wrong MC code and does register
7201 * updates before making calls, just use the regular register allocator to
7202 * ensure we get a register suitable for the intended argument number.
7203 */
7204 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7205
7206 /*
7207 * If there is already a register for the variable, we transfer/set the
7208 * guest shadow copy assignment to it.
7209 */
7210 uint8_t idxReg = pVar->idxReg;
7211 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7212 {
7213#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7214 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7215 {
7216# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7217 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7218 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7219# endif
7220 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7221 }
7222#endif
7223
7224 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7225 {
7226 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7227 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7228 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7229 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7230 }
7231 else
7232 {
7233 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7234 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7235 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7236 }
7237 /** @todo figure this one out. We need some way of making sure the register isn't
7238 * modified after this point, just in case we start writing crappy MC code. */
7239 pVar->enmGstReg = enmGstReg;
7240 pVar->fRegAcquired = true;
7241 return idxReg;
7242 }
7243 Assert(pVar->uArgNo == UINT8_MAX);
7244
7245 /*
7246 * Because this is supposed to be the commit stage, we're just tag along with the
7247 * temporary register allocator and upgrade it to a variable register.
7248 */
7249 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7250 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7251 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7252 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7253 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7254 pVar->idxReg = idxReg;
7255
7256 /*
7257 * Now we need to load the register value.
7258 */
7259 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7260 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7261 else
7262 {
7263 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7264 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7265 switch (pVar->cbVar)
7266 {
7267 case sizeof(uint64_t):
7268 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7269 break;
7270 case sizeof(uint32_t):
7271 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7272 break;
7273 case sizeof(uint16_t):
7274 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7275 break;
7276 case sizeof(uint8_t):
7277 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7278 break;
7279 default:
7280 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7281 }
7282 }
7283
7284 pVar->fRegAcquired = true;
7285 return idxReg;
7286}
7287
7288
7289/**
7290 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7291 *
7292 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7293 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7294 * requirement of flushing anything in volatile host registers when making a
7295 * call.
7296 *
7297 * @returns New @a off value.
7298 * @param pReNative The recompiler state.
7299 * @param off The code buffer position.
7300 * @param fHstRegsNotToSave Set of registers not to save & restore.
7301 */
7302DECL_HIDDEN_THROW(uint32_t)
7303iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7304{
7305 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7306 if (fHstRegs)
7307 {
7308 do
7309 {
7310 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7311 fHstRegs &= ~RT_BIT_32(idxHstReg);
7312
7313 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7314 {
7315 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7316 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7317 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7318 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7319 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7320 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7321 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7322 {
7323 case kIemNativeVarKind_Stack:
7324 {
7325 /* Temporarily spill the variable register. */
7326 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7327 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7328 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7329 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7330 continue;
7331 }
7332
7333 case kIemNativeVarKind_Immediate:
7334 case kIemNativeVarKind_VarRef:
7335 case kIemNativeVarKind_GstRegRef:
7336 /* It is weird to have any of these loaded at this point. */
7337 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7338 continue;
7339
7340 case kIemNativeVarKind_End:
7341 case kIemNativeVarKind_Invalid:
7342 break;
7343 }
7344 AssertFailed();
7345 }
7346 else
7347 {
7348 /*
7349 * Allocate a temporary stack slot and spill the register to it.
7350 */
7351 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7352 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7354 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7355 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7356 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7357 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7358 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7359 }
7360 } while (fHstRegs);
7361 }
7362#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7363
7364 /*
7365 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7366 * which would be more difficult due to spanning multiple stack slots and different sizes
7367 * (besides we only have a limited amount of slots at the moment).
7368 *
7369 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7370 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7371 */
7372 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7373
7374 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7375 if (fHstRegs)
7376 {
7377 do
7378 {
7379 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7380 fHstRegs &= ~RT_BIT_32(idxHstReg);
7381
7382 /* Fixed reserved and temporary registers don't need saving. */
7383 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7384 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7385 continue;
7386
7387 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7388
7389 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7390 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7391 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7392 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7393 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7394 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7395 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7396 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7397 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7398 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7399 {
7400 case kIemNativeVarKind_Stack:
7401 {
7402 /* Temporarily spill the variable register. */
7403 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7404 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7405 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7406 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7407 if (cbVar == sizeof(RTUINT128U))
7408 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7409 else
7410 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7411 continue;
7412 }
7413
7414 case kIemNativeVarKind_Immediate:
7415 case kIemNativeVarKind_VarRef:
7416 case kIemNativeVarKind_GstRegRef:
7417 /* It is weird to have any of these loaded at this point. */
7418 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7419 continue;
7420
7421 case kIemNativeVarKind_End:
7422 case kIemNativeVarKind_Invalid:
7423 break;
7424 }
7425 AssertFailed();
7426 } while (fHstRegs);
7427 }
7428#endif
7429 return off;
7430}
7431
7432
7433/**
7434 * Emit code to restore volatile registers after to a call to a helper.
7435 *
7436 * @returns New @a off value.
7437 * @param pReNative The recompiler state.
7438 * @param off The code buffer position.
7439 * @param fHstRegsNotToSave Set of registers not to save & restore.
7440 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7441 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7442 */
7443DECL_HIDDEN_THROW(uint32_t)
7444iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7445{
7446 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7447 if (fHstRegs)
7448 {
7449 do
7450 {
7451 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7452 fHstRegs &= ~RT_BIT_32(idxHstReg);
7453
7454 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7455 {
7456 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7457 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7458 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7459 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7460 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7461 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7462 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7463 {
7464 case kIemNativeVarKind_Stack:
7465 {
7466 /* Unspill the variable register. */
7467 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7468 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7469 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7470 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7471 continue;
7472 }
7473
7474 case kIemNativeVarKind_Immediate:
7475 case kIemNativeVarKind_VarRef:
7476 case kIemNativeVarKind_GstRegRef:
7477 /* It is weird to have any of these loaded at this point. */
7478 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7479 continue;
7480
7481 case kIemNativeVarKind_End:
7482 case kIemNativeVarKind_Invalid:
7483 break;
7484 }
7485 AssertFailed();
7486 }
7487 else
7488 {
7489 /*
7490 * Restore from temporary stack slot.
7491 */
7492 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7493 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7494 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7495 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7496
7497 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7498 }
7499 } while (fHstRegs);
7500 }
7501#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7502 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7503 if (fHstRegs)
7504 {
7505 do
7506 {
7507 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7508 fHstRegs &= ~RT_BIT_32(idxHstReg);
7509
7510 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7511 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7512 continue;
7513 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7514
7515 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7516 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7517 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7518 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7519 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7520 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7521 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7522 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7523 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7524 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7525 {
7526 case kIemNativeVarKind_Stack:
7527 {
7528 /* Unspill the variable register. */
7529 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7530 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7531 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7532 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7533
7534 if (cbVar == sizeof(RTUINT128U))
7535 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7536 else
7537 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7538 continue;
7539 }
7540
7541 case kIemNativeVarKind_Immediate:
7542 case kIemNativeVarKind_VarRef:
7543 case kIemNativeVarKind_GstRegRef:
7544 /* It is weird to have any of these loaded at this point. */
7545 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7546 continue;
7547
7548 case kIemNativeVarKind_End:
7549 case kIemNativeVarKind_Invalid:
7550 break;
7551 }
7552 AssertFailed();
7553 } while (fHstRegs);
7554 }
7555#endif
7556 return off;
7557}
7558
7559
7560/**
7561 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7562 *
7563 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7564 *
7565 * ASSUMES that @a idxVar is valid and unpacked.
7566 */
7567DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7568{
7569 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7570 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7571 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7572 {
7573 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7574 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7575 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7576 Assert(cSlots > 0);
7577 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7578 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7579 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7580 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7581 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7582 }
7583 else
7584 Assert(idxStackSlot == UINT8_MAX);
7585}
7586
7587
7588/**
7589 * Worker that frees a single variable.
7590 *
7591 * ASSUMES that @a idxVar is valid and unpacked.
7592 */
7593DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7594{
7595 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7596 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7597 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7598
7599 /* Free the host register first if any assigned. */
7600 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7601#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7602 if ( idxHstReg != UINT8_MAX
7603 && pReNative->Core.aVars[idxVar].fSimdReg)
7604 {
7605 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7606 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7607 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7608 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7609 }
7610 else
7611#endif
7612 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7613 {
7614 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7615 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7616 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7617 }
7618
7619 /* Free argument mapping. */
7620 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7621 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7622 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7623
7624 /* Free the stack slots. */
7625 iemNativeVarFreeStackSlots(pReNative, idxVar);
7626
7627 /* Free the actual variable. */
7628 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7629 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7630}
7631
7632
7633/**
7634 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7635 */
7636DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7637{
7638 while (bmVars != 0)
7639 {
7640 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7641 bmVars &= ~RT_BIT_32(idxVar);
7642
7643#if 1 /** @todo optimize by simplifying this later... */
7644 iemNativeVarFreeOneWorker(pReNative, idxVar);
7645#else
7646 /* Only need to free the host register, the rest is done as bulk updates below. */
7647 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7648 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7649 {
7650 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7651 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7652 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7653 }
7654#endif
7655 }
7656#if 0 /** @todo optimize by simplifying this later... */
7657 pReNative->Core.bmVars = 0;
7658 pReNative->Core.bmStack = 0;
7659 pReNative->Core.u64ArgVars = UINT64_MAX;
7660#endif
7661}
7662
7663
7664
7665/*********************************************************************************************************************************
7666* Emitters for IEM_MC_CALL_CIMPL_XXX *
7667*********************************************************************************************************************************/
7668
7669/**
7670 * Emits code to load a reference to the given guest register into @a idxGprDst.
7671 */
7672DECL_HIDDEN_THROW(uint32_t)
7673iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7674 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7675{
7676#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7677 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7678#endif
7679
7680 /*
7681 * Get the offset relative to the CPUMCTX structure.
7682 */
7683 uint32_t offCpumCtx;
7684 switch (enmClass)
7685 {
7686 case kIemNativeGstRegRef_Gpr:
7687 Assert(idxRegInClass < 16);
7688 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7689 break;
7690
7691 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7692 Assert(idxRegInClass < 4);
7693 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7694 break;
7695
7696 case kIemNativeGstRegRef_EFlags:
7697 Assert(idxRegInClass == 0);
7698 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7699 break;
7700
7701 case kIemNativeGstRegRef_MxCsr:
7702 Assert(idxRegInClass == 0);
7703 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7704 break;
7705
7706 case kIemNativeGstRegRef_FpuReg:
7707 Assert(idxRegInClass < 8);
7708 AssertFailed(); /** @todo what kind of indexing? */
7709 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7710 break;
7711
7712 case kIemNativeGstRegRef_MReg:
7713 Assert(idxRegInClass < 8);
7714 AssertFailed(); /** @todo what kind of indexing? */
7715 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7716 break;
7717
7718 case kIemNativeGstRegRef_XReg:
7719 Assert(idxRegInClass < 16);
7720 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7721 break;
7722
7723 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7724 Assert(idxRegInClass == 0);
7725 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7726 break;
7727
7728 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7729 Assert(idxRegInClass == 0);
7730 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7731 break;
7732
7733 default:
7734 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7735 }
7736
7737 /*
7738 * Load the value into the destination register.
7739 */
7740#ifdef RT_ARCH_AMD64
7741 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7742
7743#elif defined(RT_ARCH_ARM64)
7744 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7745 Assert(offCpumCtx < 4096);
7746 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7747
7748#else
7749# error "Port me!"
7750#endif
7751
7752 return off;
7753}
7754
7755
7756/**
7757 * Common code for CIMPL and AIMPL calls.
7758 *
7759 * These are calls that uses argument variables and such. They should not be
7760 * confused with internal calls required to implement an MC operation,
7761 * like a TLB load and similar.
7762 *
7763 * Upon return all that is left to do is to load any hidden arguments and
7764 * perform the call. All argument variables are freed.
7765 *
7766 * @returns New code buffer offset; throws VBox status code on error.
7767 * @param pReNative The native recompile state.
7768 * @param off The code buffer offset.
7769 * @param cArgs The total nubmer of arguments (includes hidden
7770 * count).
7771 * @param cHiddenArgs The number of hidden arguments. The hidden
7772 * arguments must not have any variable declared for
7773 * them, whereas all the regular arguments must
7774 * (tstIEMCheckMc ensures this).
7775 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7776 * this will still flush pending writes in call volatile registers if false.
7777 */
7778DECL_HIDDEN_THROW(uint32_t)
7779iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7780 bool fFlushPendingWrites /*= true*/)
7781{
7782#ifdef VBOX_STRICT
7783 /*
7784 * Assert sanity.
7785 */
7786 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7787 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7788 for (unsigned i = 0; i < cHiddenArgs; i++)
7789 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7790 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7791 {
7792 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7793 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7794 }
7795 iemNativeRegAssertSanity(pReNative);
7796#endif
7797
7798 /* We don't know what the called function makes use of, so flush any pending register writes. */
7799 RT_NOREF(fFlushPendingWrites);
7800#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7801 if (fFlushPendingWrites)
7802#endif
7803 off = iemNativeRegFlushPendingWrites(pReNative, off);
7804
7805 /*
7806 * Before we do anything else, go over variables that are referenced and
7807 * make sure they are not in a register.
7808 */
7809 uint32_t bmVars = pReNative->Core.bmVars;
7810 if (bmVars)
7811 {
7812 do
7813 {
7814 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7815 bmVars &= ~RT_BIT_32(idxVar);
7816
7817 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7818 {
7819 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7820#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7821 if ( idxRegOld != UINT8_MAX
7822 && pReNative->Core.aVars[idxVar].fSimdReg)
7823 {
7824 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7825 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7826
7827 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7828 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7829 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7830 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7831 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7832 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7833 else
7834 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7835
7836 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7837 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7838
7839 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7840 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7841 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7842 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7843 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7844 }
7845 else
7846#endif
7847 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7848 {
7849 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7850 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7851 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7852 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7853 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7854
7855 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7856 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7857 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7858 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7859 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7860 }
7861 }
7862 } while (bmVars != 0);
7863#if 0 //def VBOX_STRICT
7864 iemNativeRegAssertSanity(pReNative);
7865#endif
7866 }
7867
7868 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7869
7870#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7871 /*
7872 * At the very first step go over the host registers that will be used for arguments
7873 * don't shadow anything which needs writing back first.
7874 */
7875 for (uint32_t i = 0; i < cRegArgs; i++)
7876 {
7877 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7878
7879 /* Writeback any dirty guest shadows before using this register. */
7880 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7881 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7882 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7883 }
7884#endif
7885
7886 /*
7887 * First, go over the host registers that will be used for arguments and make
7888 * sure they either hold the desired argument or are free.
7889 */
7890 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7891 {
7892 for (uint32_t i = 0; i < cRegArgs; i++)
7893 {
7894 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7895 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7896 {
7897 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7898 {
7899 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7900 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7901 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7902 Assert(pVar->idxReg == idxArgReg);
7903 uint8_t const uArgNo = pVar->uArgNo;
7904 if (uArgNo == i)
7905 { /* prefect */ }
7906 /* The variable allocator logic should make sure this is impossible,
7907 except for when the return register is used as a parameter (ARM,
7908 but not x86). */
7909#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7910 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7911 {
7912# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7913# error "Implement this"
7914# endif
7915 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7916 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7917 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7918 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7919 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7920 }
7921#endif
7922 else
7923 {
7924 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7925
7926 if (pVar->enmKind == kIemNativeVarKind_Stack)
7927 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7928 else
7929 {
7930 /* just free it, can be reloaded if used again */
7931 pVar->idxReg = UINT8_MAX;
7932 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7933 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7934 }
7935 }
7936 }
7937 else
7938 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7939 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7940 }
7941 }
7942#if 0 //def VBOX_STRICT
7943 iemNativeRegAssertSanity(pReNative);
7944#endif
7945 }
7946
7947 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7948
7949#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7950 /*
7951 * If there are any stack arguments, make sure they are in their place as well.
7952 *
7953 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7954 * the caller) be loading it later and it must be free (see first loop).
7955 */
7956 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7957 {
7958 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7959 {
7960 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7961 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7962 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7963 {
7964 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7965 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7966 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7967 pVar->idxReg = UINT8_MAX;
7968 }
7969 else
7970 {
7971 /* Use ARG0 as temp for stuff we need registers for. */
7972 switch (pVar->enmKind)
7973 {
7974 case kIemNativeVarKind_Stack:
7975 {
7976 uint8_t const idxStackSlot = pVar->idxStackSlot;
7977 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7978 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7979 iemNativeStackCalcBpDisp(idxStackSlot));
7980 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7981 continue;
7982 }
7983
7984 case kIemNativeVarKind_Immediate:
7985 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7986 continue;
7987
7988 case kIemNativeVarKind_VarRef:
7989 {
7990 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7991 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7992 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7993 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7994 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7995# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7996 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
7997 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
7998 if ( fSimdReg
7999 && idxRegOther != UINT8_MAX)
8000 {
8001 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8002 if (cbVar == sizeof(RTUINT128U))
8003 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8004 else
8005 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8006 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8007 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8008 }
8009 else
8010# endif
8011 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8012 {
8013 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8014 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8015 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8016 }
8017 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8018 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8019 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8020 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8021 continue;
8022 }
8023
8024 case kIemNativeVarKind_GstRegRef:
8025 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8026 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8027 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8028 continue;
8029
8030 case kIemNativeVarKind_Invalid:
8031 case kIemNativeVarKind_End:
8032 break;
8033 }
8034 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8035 }
8036 }
8037# if 0 //def VBOX_STRICT
8038 iemNativeRegAssertSanity(pReNative);
8039# endif
8040 }
8041#else
8042 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8043#endif
8044
8045 /*
8046 * Make sure the argument variables are loaded into their respective registers.
8047 *
8048 * We can optimize this by ASSUMING that any register allocations are for
8049 * registeres that have already been loaded and are ready. The previous step
8050 * saw to that.
8051 */
8052 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8053 {
8054 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8055 {
8056 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8057 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8058 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8059 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8060 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8061 else
8062 {
8063 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8064 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8065 {
8066 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8067 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8068 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8069 | RT_BIT_32(idxArgReg);
8070 pVar->idxReg = idxArgReg;
8071 }
8072 else
8073 {
8074 /* Use ARG0 as temp for stuff we need registers for. */
8075 switch (pVar->enmKind)
8076 {
8077 case kIemNativeVarKind_Stack:
8078 {
8079 uint8_t const idxStackSlot = pVar->idxStackSlot;
8080 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8081 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8082 continue;
8083 }
8084
8085 case kIemNativeVarKind_Immediate:
8086 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8087 continue;
8088
8089 case kIemNativeVarKind_VarRef:
8090 {
8091 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8092 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8093 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8094 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8095 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8096 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8097#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8098 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8099 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8100 if ( fSimdReg
8101 && idxRegOther != UINT8_MAX)
8102 {
8103 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8104 if (cbVar == sizeof(RTUINT128U))
8105 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8106 else
8107 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8108 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8109 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8110 }
8111 else
8112#endif
8113 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8114 {
8115 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8116 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8117 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8118 }
8119 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8120 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8121 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8122 continue;
8123 }
8124
8125 case kIemNativeVarKind_GstRegRef:
8126 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8127 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8128 continue;
8129
8130 case kIemNativeVarKind_Invalid:
8131 case kIemNativeVarKind_End:
8132 break;
8133 }
8134 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8135 }
8136 }
8137 }
8138#if 0 //def VBOX_STRICT
8139 iemNativeRegAssertSanity(pReNative);
8140#endif
8141 }
8142#ifdef VBOX_STRICT
8143 else
8144 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8145 {
8146 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8147 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8148 }
8149#endif
8150
8151 /*
8152 * Free all argument variables (simplified).
8153 * Their lifetime always expires with the call they are for.
8154 */
8155 /** @todo Make the python script check that arguments aren't used after
8156 * IEM_MC_CALL_XXXX. */
8157 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8158 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8159 * an argument value. There is also some FPU stuff. */
8160 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8161 {
8162 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8163 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8164
8165 /* no need to free registers: */
8166 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8167 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8168 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8169 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8170 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8171 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8172
8173 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8174 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8175 iemNativeVarFreeStackSlots(pReNative, idxVar);
8176 }
8177 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8178
8179 /*
8180 * Flush volatile registers as we make the call.
8181 */
8182 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8183
8184 return off;
8185}
8186
8187
8188
8189/*********************************************************************************************************************************
8190* TLB Lookup. *
8191*********************************************************************************************************************************/
8192
8193/**
8194 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8195 */
8196DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8197{
8198 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8199 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8200 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8201 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8202
8203 /* Do the lookup manually. */
8204 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8205 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8206 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8207 if (RT_LIKELY(pTlbe->uTag == uTag))
8208 {
8209 /*
8210 * Check TLB page table level access flags.
8211 */
8212 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8213 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8214 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8215 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8216 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8217 | IEMTLBE_F_PG_UNASSIGNED
8218 | IEMTLBE_F_PT_NO_ACCESSED
8219 | fNoWriteNoDirty | fNoUser);
8220 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8221 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8222 {
8223 /*
8224 * Return the address.
8225 */
8226 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8227 if ((uintptr_t)pbAddr == uResult)
8228 return;
8229 RT_NOREF(cbMem);
8230 AssertFailed();
8231 }
8232 else
8233 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8234 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8235 }
8236 else
8237 AssertFailed();
8238 RT_BREAKPOINT();
8239}
8240
8241/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8242
8243
8244
8245/*********************************************************************************************************************************
8246* Recompiler Core. *
8247*********************************************************************************************************************************/
8248
8249/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8250static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8251{
8252 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8253 pDis->cbCachedInstr += cbMaxRead;
8254 RT_NOREF(cbMinRead);
8255 return VERR_NO_DATA;
8256}
8257
8258
8259DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8260{
8261 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8262 {
8263#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8264 ENTRY(fLocalForcedActions),
8265 ENTRY(iem.s.rcPassUp),
8266 ENTRY(iem.s.fExec),
8267 ENTRY(iem.s.pbInstrBuf),
8268 ENTRY(iem.s.uInstrBufPc),
8269 ENTRY(iem.s.GCPhysInstrBuf),
8270 ENTRY(iem.s.cbInstrBufTotal),
8271 ENTRY(iem.s.idxTbCurInstr),
8272 ENTRY(iem.s.fSkippingEFlags),
8273#ifdef VBOX_WITH_STATISTICS
8274 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8275 ENTRY(iem.s.StatNativeTlbHitsForStore),
8276 ENTRY(iem.s.StatNativeTlbHitsForStack),
8277 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8278 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8279 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8280 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8281 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8282#endif
8283 ENTRY(iem.s.DataTlb.uTlbRevision),
8284 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8285 ENTRY(iem.s.DataTlb.cTlbHits),
8286 ENTRY(iem.s.DataTlb.aEntries),
8287 ENTRY(iem.s.CodeTlb.uTlbRevision),
8288 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8289 ENTRY(iem.s.CodeTlb.cTlbHits),
8290 ENTRY(iem.s.CodeTlb.aEntries),
8291 ENTRY(pVMR3),
8292 ENTRY(cpum.GstCtx.rax),
8293 ENTRY(cpum.GstCtx.ah),
8294 ENTRY(cpum.GstCtx.rcx),
8295 ENTRY(cpum.GstCtx.ch),
8296 ENTRY(cpum.GstCtx.rdx),
8297 ENTRY(cpum.GstCtx.dh),
8298 ENTRY(cpum.GstCtx.rbx),
8299 ENTRY(cpum.GstCtx.bh),
8300 ENTRY(cpum.GstCtx.rsp),
8301 ENTRY(cpum.GstCtx.rbp),
8302 ENTRY(cpum.GstCtx.rsi),
8303 ENTRY(cpum.GstCtx.rdi),
8304 ENTRY(cpum.GstCtx.r8),
8305 ENTRY(cpum.GstCtx.r9),
8306 ENTRY(cpum.GstCtx.r10),
8307 ENTRY(cpum.GstCtx.r11),
8308 ENTRY(cpum.GstCtx.r12),
8309 ENTRY(cpum.GstCtx.r13),
8310 ENTRY(cpum.GstCtx.r14),
8311 ENTRY(cpum.GstCtx.r15),
8312 ENTRY(cpum.GstCtx.es.Sel),
8313 ENTRY(cpum.GstCtx.es.u64Base),
8314 ENTRY(cpum.GstCtx.es.u32Limit),
8315 ENTRY(cpum.GstCtx.es.Attr),
8316 ENTRY(cpum.GstCtx.cs.Sel),
8317 ENTRY(cpum.GstCtx.cs.u64Base),
8318 ENTRY(cpum.GstCtx.cs.u32Limit),
8319 ENTRY(cpum.GstCtx.cs.Attr),
8320 ENTRY(cpum.GstCtx.ss.Sel),
8321 ENTRY(cpum.GstCtx.ss.u64Base),
8322 ENTRY(cpum.GstCtx.ss.u32Limit),
8323 ENTRY(cpum.GstCtx.ss.Attr),
8324 ENTRY(cpum.GstCtx.ds.Sel),
8325 ENTRY(cpum.GstCtx.ds.u64Base),
8326 ENTRY(cpum.GstCtx.ds.u32Limit),
8327 ENTRY(cpum.GstCtx.ds.Attr),
8328 ENTRY(cpum.GstCtx.fs.Sel),
8329 ENTRY(cpum.GstCtx.fs.u64Base),
8330 ENTRY(cpum.GstCtx.fs.u32Limit),
8331 ENTRY(cpum.GstCtx.fs.Attr),
8332 ENTRY(cpum.GstCtx.gs.Sel),
8333 ENTRY(cpum.GstCtx.gs.u64Base),
8334 ENTRY(cpum.GstCtx.gs.u32Limit),
8335 ENTRY(cpum.GstCtx.gs.Attr),
8336 ENTRY(cpum.GstCtx.rip),
8337 ENTRY(cpum.GstCtx.eflags),
8338 ENTRY(cpum.GstCtx.uRipInhibitInt),
8339 ENTRY(cpum.GstCtx.cr0),
8340 ENTRY(cpum.GstCtx.cr4),
8341 ENTRY(cpum.GstCtx.aXcr[0]),
8342 ENTRY(cpum.GstCtx.aXcr[1]),
8343#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8344 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8345 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8346 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8347 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8348 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8349 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8350 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8351 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8352 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8353 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8354 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8355 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8356 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8357 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8358 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8359 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8360 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8361 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8362 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8363 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8364 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8365 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8366 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8367 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8368 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8369 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8370 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8371 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8372 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8373 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8374 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8375 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8376 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8377#endif
8378#undef ENTRY
8379 };
8380#ifdef VBOX_STRICT
8381 static bool s_fOrderChecked = false;
8382 if (!s_fOrderChecked)
8383 {
8384 s_fOrderChecked = true;
8385 uint32_t offPrev = s_aMembers[0].off;
8386 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8387 {
8388 Assert(s_aMembers[i].off > offPrev);
8389 offPrev = s_aMembers[i].off;
8390 }
8391 }
8392#endif
8393
8394 /*
8395 * Binary lookup.
8396 */
8397 unsigned iStart = 0;
8398 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8399 for (;;)
8400 {
8401 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8402 uint32_t const offCur = s_aMembers[iCur].off;
8403 if (off < offCur)
8404 {
8405 if (iCur != iStart)
8406 iEnd = iCur;
8407 else
8408 break;
8409 }
8410 else if (off > offCur)
8411 {
8412 if (iCur + 1 < iEnd)
8413 iStart = iCur + 1;
8414 else
8415 break;
8416 }
8417 else
8418 return s_aMembers[iCur].pszName;
8419 }
8420#ifdef VBOX_WITH_STATISTICS
8421 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8422 return "iem.s.acThreadedFuncStats[iFn]";
8423#endif
8424 return NULL;
8425}
8426
8427
8428DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8429{
8430 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8431#if defined(RT_ARCH_AMD64)
8432 static const char * const a_apszMarkers[] =
8433 {
8434 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8435 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8436 };
8437#endif
8438
8439 char szDisBuf[512];
8440 DISSTATE Dis;
8441 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8442 uint32_t const cNative = pTb->Native.cInstructions;
8443 uint32_t offNative = 0;
8444#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8445 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8446#endif
8447 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8448 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8449 : DISCPUMODE_64BIT;
8450#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8451 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8452#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8453 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8454#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8455# error "Port me"
8456#else
8457 csh hDisasm = ~(size_t)0;
8458# if defined(RT_ARCH_AMD64)
8459 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8460# elif defined(RT_ARCH_ARM64)
8461 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8462# else
8463# error "Port me"
8464# endif
8465 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8466
8467 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8468 //Assert(rcCs == CS_ERR_OK);
8469#endif
8470
8471 /*
8472 * Print TB info.
8473 */
8474 pHlp->pfnPrintf(pHlp,
8475 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8476 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8477 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8478 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8479#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8480 if (pDbgInfo && pDbgInfo->cEntries > 1)
8481 {
8482 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8483
8484 /*
8485 * This disassembly is driven by the debug info which follows the native
8486 * code and indicates when it starts with the next guest instructions,
8487 * where labels are and such things.
8488 */
8489 uint32_t idxThreadedCall = 0;
8490 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8491 uint8_t idxRange = UINT8_MAX;
8492 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8493 uint32_t offRange = 0;
8494 uint32_t offOpcodes = 0;
8495 uint32_t const cbOpcodes = pTb->cbOpcodes;
8496 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8497 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8498 uint32_t iDbgEntry = 1;
8499 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8500
8501 while (offNative < cNative)
8502 {
8503 /* If we're at or have passed the point where the next chunk of debug
8504 info starts, process it. */
8505 if (offDbgNativeNext <= offNative)
8506 {
8507 offDbgNativeNext = UINT32_MAX;
8508 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8509 {
8510 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8511 {
8512 case kIemTbDbgEntryType_GuestInstruction:
8513 {
8514 /* Did the exec flag change? */
8515 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8516 {
8517 pHlp->pfnPrintf(pHlp,
8518 " fExec change %#08x -> %#08x %s\n",
8519 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8520 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8521 szDisBuf, sizeof(szDisBuf)));
8522 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8523 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8524 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8525 : DISCPUMODE_64BIT;
8526 }
8527
8528 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8529 where the compilation was aborted before the opcode was recorded and the actual
8530 instruction was translated to a threaded call. This may happen when we run out
8531 of ranges, or when some complicated interrupts/FFs are found to be pending or
8532 similar. So, we just deal with it here rather than in the compiler code as it
8533 is a lot simpler to do here. */
8534 if ( idxRange == UINT8_MAX
8535 || idxRange >= cRanges
8536 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8537 {
8538 idxRange += 1;
8539 if (idxRange < cRanges)
8540 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8541 else
8542 continue;
8543 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8544 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8545 + (pTb->aRanges[idxRange].idxPhysPage == 0
8546 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8547 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8548 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8549 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8550 pTb->aRanges[idxRange].idxPhysPage);
8551 GCPhysPc += offRange;
8552 }
8553
8554 /* Disassemble the instruction. */
8555 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8556 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8557 uint32_t cbInstr = 1;
8558 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8559 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8560 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8561 if (RT_SUCCESS(rc))
8562 {
8563 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8564 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8565 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8566 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8567
8568 static unsigned const s_offMarker = 55;
8569 static char const s_szMarker[] = " ; <--- guest";
8570 if (cch < s_offMarker)
8571 {
8572 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8573 cch = s_offMarker;
8574 }
8575 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8576 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8577
8578 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8579 }
8580 else
8581 {
8582 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8583 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8584 cbInstr = 1;
8585 }
8586 GCPhysPc += cbInstr;
8587 offOpcodes += cbInstr;
8588 offRange += cbInstr;
8589 continue;
8590 }
8591
8592 case kIemTbDbgEntryType_ThreadedCall:
8593 pHlp->pfnPrintf(pHlp,
8594 " Call #%u to %s (%u args) - %s\n",
8595 idxThreadedCall,
8596 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8597 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8598 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8599 idxThreadedCall++;
8600 continue;
8601
8602 case kIemTbDbgEntryType_GuestRegShadowing:
8603 {
8604 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8605 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8606 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8607 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8608 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8609 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8610 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8611 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8612 else
8613 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8614 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8615 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8616 continue;
8617 }
8618
8619#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8620 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8621 {
8622 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8623 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8624 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8625 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8626 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8627 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8628 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8629 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8630 else
8631 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8632 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8633 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8634 continue;
8635 }
8636#endif
8637
8638 case kIemTbDbgEntryType_Label:
8639 {
8640 const char *pszName = "what_the_fudge";
8641 const char *pszComment = "";
8642 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8643 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8644 {
8645 case kIemNativeLabelType_Return: pszName = "Return"; break;
8646 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8647 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8648 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8649 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8650 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8651 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8652 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8653 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8654 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8655 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8656 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8657 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8658 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8659 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8660 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8661 case kIemNativeLabelType_If:
8662 pszName = "If";
8663 fNumbered = true;
8664 break;
8665 case kIemNativeLabelType_Else:
8666 pszName = "Else";
8667 fNumbered = true;
8668 pszComment = " ; regs state restored pre-if-block";
8669 break;
8670 case kIemNativeLabelType_Endif:
8671 pszName = "Endif";
8672 fNumbered = true;
8673 break;
8674 case kIemNativeLabelType_CheckIrq:
8675 pszName = "CheckIrq_CheckVM";
8676 fNumbered = true;
8677 break;
8678 case kIemNativeLabelType_TlbLookup:
8679 pszName = "TlbLookup";
8680 fNumbered = true;
8681 break;
8682 case kIemNativeLabelType_TlbMiss:
8683 pszName = "TlbMiss";
8684 fNumbered = true;
8685 break;
8686 case kIemNativeLabelType_TlbDone:
8687 pszName = "TlbDone";
8688 fNumbered = true;
8689 break;
8690 case kIemNativeLabelType_Invalid:
8691 case kIemNativeLabelType_End:
8692 break;
8693 }
8694 if (fNumbered)
8695 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8696 else
8697 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8698 continue;
8699 }
8700
8701 case kIemTbDbgEntryType_NativeOffset:
8702 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8703 Assert(offDbgNativeNext >= offNative);
8704 break;
8705
8706#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8707 case kIemTbDbgEntryType_DelayedPcUpdate:
8708 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8709 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8710 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8711 continue;
8712#endif
8713
8714#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8715 case kIemTbDbgEntryType_GuestRegDirty:
8716 {
8717 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8718 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8719 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8720 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8721 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8722 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8723 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8724 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8725 pszGstReg, pszHstReg);
8726 continue;
8727 }
8728
8729 case kIemTbDbgEntryType_GuestRegWriteback:
8730 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8731 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8732 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
8733 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
8734 continue;
8735#endif
8736
8737 default:
8738 AssertFailed();
8739 }
8740 iDbgEntry++;
8741 break;
8742 }
8743 }
8744
8745 /*
8746 * Disassemble the next native instruction.
8747 */
8748 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8749# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8750 uint32_t cbInstr = sizeof(paNative[0]);
8751 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8752 if (RT_SUCCESS(rc))
8753 {
8754# if defined(RT_ARCH_AMD64)
8755 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8756 {
8757 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8758 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8759 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8760 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8761 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8762 uInfo & 0x8000 ? "recompiled" : "todo");
8763 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8764 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8765 else
8766 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8767 }
8768 else
8769# endif
8770 {
8771 const char *pszAnnotation = NULL;
8772# ifdef RT_ARCH_AMD64
8773 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8774 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8775 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8776 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8777 PCDISOPPARAM pMemOp;
8778 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8779 pMemOp = &Dis.Param1;
8780 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8781 pMemOp = &Dis.Param2;
8782 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8783 pMemOp = &Dis.Param3;
8784 else
8785 pMemOp = NULL;
8786 if ( pMemOp
8787 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8788 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8789 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8790 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8791
8792#elif defined(RT_ARCH_ARM64)
8793 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8794 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8795 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8796# else
8797# error "Port me"
8798# endif
8799 if (pszAnnotation)
8800 {
8801 static unsigned const s_offAnnotation = 55;
8802 size_t const cchAnnotation = strlen(pszAnnotation);
8803 size_t cchDis = strlen(szDisBuf);
8804 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8805 {
8806 if (cchDis < s_offAnnotation)
8807 {
8808 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8809 cchDis = s_offAnnotation;
8810 }
8811 szDisBuf[cchDis++] = ' ';
8812 szDisBuf[cchDis++] = ';';
8813 szDisBuf[cchDis++] = ' ';
8814 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8815 }
8816 }
8817 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8818 }
8819 }
8820 else
8821 {
8822# if defined(RT_ARCH_AMD64)
8823 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8824 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8825# elif defined(RT_ARCH_ARM64)
8826 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8827# else
8828# error "Port me"
8829# endif
8830 cbInstr = sizeof(paNative[0]);
8831 }
8832 offNative += cbInstr / sizeof(paNative[0]);
8833
8834# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8835 cs_insn *pInstr;
8836 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8837 (uintptr_t)pNativeCur, 1, &pInstr);
8838 if (cInstrs > 0)
8839 {
8840 Assert(cInstrs == 1);
8841 const char *pszAnnotation = NULL;
8842# if defined(RT_ARCH_ARM64)
8843 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8844 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8845 {
8846 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8847 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8848 char *psz = strchr(pInstr->op_str, '[');
8849 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8850 {
8851 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8852 int32_t off = -1;
8853 psz += 4;
8854 if (*psz == ']')
8855 off = 0;
8856 else if (*psz == ',')
8857 {
8858 psz = RTStrStripL(psz + 1);
8859 if (*psz == '#')
8860 off = RTStrToInt32(&psz[1]);
8861 /** @todo deal with index registers and LSL as well... */
8862 }
8863 if (off >= 0)
8864 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8865 }
8866 }
8867# endif
8868
8869 size_t const cchOp = strlen(pInstr->op_str);
8870# if defined(RT_ARCH_AMD64)
8871 if (pszAnnotation)
8872 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8873 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8874 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8875 else
8876 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8877 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8878
8879# else
8880 if (pszAnnotation)
8881 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8882 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8883 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8884 else
8885 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8886 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8887# endif
8888 offNative += pInstr->size / sizeof(*pNativeCur);
8889 cs_free(pInstr, cInstrs);
8890 }
8891 else
8892 {
8893# if defined(RT_ARCH_AMD64)
8894 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8895 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8896# else
8897 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8898# endif
8899 offNative++;
8900 }
8901# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8902 }
8903 }
8904 else
8905#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8906 {
8907 /*
8908 * No debug info, just disassemble the x86 code and then the native code.
8909 *
8910 * First the guest code:
8911 */
8912 for (unsigned i = 0; i < pTb->cRanges; i++)
8913 {
8914 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8915 + (pTb->aRanges[i].idxPhysPage == 0
8916 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8917 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8918 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8919 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8920 unsigned off = pTb->aRanges[i].offOpcodes;
8921 /** @todo this ain't working when crossing pages! */
8922 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8923 while (off < cbOpcodes)
8924 {
8925 uint32_t cbInstr = 1;
8926 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8927 &pTb->pabOpcodes[off], cbOpcodes - off,
8928 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8929 if (RT_SUCCESS(rc))
8930 {
8931 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8932 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8933 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8934 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8935 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8936 GCPhysPc += cbInstr;
8937 off += cbInstr;
8938 }
8939 else
8940 {
8941 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8942 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8943 break;
8944 }
8945 }
8946 }
8947
8948 /*
8949 * Then the native code:
8950 */
8951 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8952 while (offNative < cNative)
8953 {
8954 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8955# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8956 uint32_t cbInstr = sizeof(paNative[0]);
8957 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8958 if (RT_SUCCESS(rc))
8959 {
8960# if defined(RT_ARCH_AMD64)
8961 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8962 {
8963 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8964 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8965 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8966 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8967 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8968 uInfo & 0x8000 ? "recompiled" : "todo");
8969 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8970 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8971 else
8972 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8973 }
8974 else
8975# endif
8976 {
8977# ifdef RT_ARCH_AMD64
8978 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8979 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8980 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8981 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8982# elif defined(RT_ARCH_ARM64)
8983 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8984 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8985 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8986# else
8987# error "Port me"
8988# endif
8989 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8990 }
8991 }
8992 else
8993 {
8994# if defined(RT_ARCH_AMD64)
8995 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8996 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8997# else
8998 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8999# endif
9000 cbInstr = sizeof(paNative[0]);
9001 }
9002 offNative += cbInstr / sizeof(paNative[0]);
9003
9004# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9005 cs_insn *pInstr;
9006 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9007 (uintptr_t)pNativeCur, 1, &pInstr);
9008 if (cInstrs > 0)
9009 {
9010 Assert(cInstrs == 1);
9011# if defined(RT_ARCH_AMD64)
9012 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9013 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9014# else
9015 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9016 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9017# endif
9018 offNative += pInstr->size / sizeof(*pNativeCur);
9019 cs_free(pInstr, cInstrs);
9020 }
9021 else
9022 {
9023# if defined(RT_ARCH_AMD64)
9024 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9025 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9026# else
9027 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9028# endif
9029 offNative++;
9030 }
9031# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9032 }
9033 }
9034
9035#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9036 /* Cleanup. */
9037 cs_close(&hDisasm);
9038#endif
9039}
9040
9041
9042/**
9043 * Recompiles the given threaded TB into a native one.
9044 *
9045 * In case of failure the translation block will be returned as-is.
9046 *
9047 * @returns pTb.
9048 * @param pVCpu The cross context virtual CPU structure of the calling
9049 * thread.
9050 * @param pTb The threaded translation to recompile to native.
9051 */
9052DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9053{
9054#if 0 /* For profiling the native recompiler code. */
9055l_profile_again:
9056#endif
9057 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9058
9059 /*
9060 * The first time thru, we allocate the recompiler state, the other times
9061 * we just need to reset it before using it again.
9062 */
9063 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9064 if (RT_LIKELY(pReNative))
9065 iemNativeReInit(pReNative, pTb);
9066 else
9067 {
9068 pReNative = iemNativeInit(pVCpu, pTb);
9069 AssertReturn(pReNative, pTb);
9070 }
9071
9072#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9073 /*
9074 * First do liveness analysis. This is done backwards.
9075 */
9076 {
9077 uint32_t idxCall = pTb->Thrd.cCalls;
9078 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9079 { /* likely */ }
9080 else
9081 {
9082 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9083 while (idxCall > cAlloc)
9084 cAlloc *= 2;
9085 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9086 AssertReturn(pvNew, pTb);
9087 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9088 pReNative->cLivenessEntriesAlloc = cAlloc;
9089 }
9090 AssertReturn(idxCall > 0, pTb);
9091 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9092
9093 /* The initial (final) entry. */
9094 idxCall--;
9095 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9096
9097 /* Loop backwards thru the calls and fill in the other entries. */
9098 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9099 while (idxCall > 0)
9100 {
9101 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9102 if (pfnLiveness)
9103 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9104 else
9105 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9106 pCallEntry--;
9107 idxCall--;
9108 }
9109
9110# ifdef VBOX_WITH_STATISTICS
9111 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9112 to 'clobbered' rather that 'input'. */
9113 /** @todo */
9114# endif
9115 }
9116#endif
9117
9118 /*
9119 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9120 * for aborting if an error happens.
9121 */
9122 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9123#ifdef LOG_ENABLED
9124 uint32_t const cCallsOrg = cCallsLeft;
9125#endif
9126 uint32_t off = 0;
9127 int rc = VINF_SUCCESS;
9128 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9129 {
9130#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
9131 /*
9132 * Emit prolog code (fixed).
9133 */
9134 off = iemNativeEmitProlog(pReNative, off);
9135#endif
9136
9137 /*
9138 * Convert the calls to native code.
9139 */
9140#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9141 int32_t iGstInstr = -1;
9142#endif
9143#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9144 uint32_t cThreadedCalls = 0;
9145 uint32_t cRecompiledCalls = 0;
9146#endif
9147#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9148 uint32_t idxCurCall = 0;
9149#endif
9150 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9151 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9152 while (cCallsLeft-- > 0)
9153 {
9154 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9155#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9156 pReNative->idxCurCall = idxCurCall;
9157#endif
9158
9159 /*
9160 * Debug info, assembly markup and statistics.
9161 */
9162#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9163 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9164 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9165#endif
9166#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9167 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9168 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9169 {
9170 if (iGstInstr < (int32_t)pTb->cInstructions)
9171 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9172 else
9173 Assert(iGstInstr == pTb->cInstructions);
9174 iGstInstr = pCallEntry->idxInstr;
9175 }
9176 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9177#endif
9178#if defined(VBOX_STRICT)
9179 off = iemNativeEmitMarker(pReNative, off,
9180 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9181#endif
9182#if defined(VBOX_STRICT)
9183 iemNativeRegAssertSanity(pReNative);
9184#endif
9185#ifdef VBOX_WITH_STATISTICS
9186 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9187#endif
9188
9189 /*
9190 * Actual work.
9191 */
9192 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9193 pfnRecom ? "(recompiled)" : "(todo)"));
9194 if (pfnRecom) /** @todo stats on this. */
9195 {
9196 off = pfnRecom(pReNative, off, pCallEntry);
9197 STAM_REL_STATS({cRecompiledCalls++;});
9198 }
9199 else
9200 {
9201 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9202 STAM_REL_STATS({cThreadedCalls++;});
9203 }
9204 Assert(off <= pReNative->cInstrBufAlloc);
9205 Assert(pReNative->cCondDepth == 0);
9206
9207#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9208 if (LogIs2Enabled())
9209 {
9210 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9211# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9212 static const char s_achState[] = "CUXI";
9213# else
9214 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9215# endif
9216
9217 char szGpr[17];
9218 for (unsigned i = 0; i < 16; i++)
9219 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9220 szGpr[16] = '\0';
9221
9222 char szSegBase[X86_SREG_COUNT + 1];
9223 char szSegLimit[X86_SREG_COUNT + 1];
9224 char szSegAttrib[X86_SREG_COUNT + 1];
9225 char szSegSel[X86_SREG_COUNT + 1];
9226 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9227 {
9228 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9229 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9230 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9231 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9232 }
9233 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9234 = szSegSel[X86_SREG_COUNT] = '\0';
9235
9236 char szEFlags[8];
9237 for (unsigned i = 0; i < 7; i++)
9238 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9239 szEFlags[7] = '\0';
9240
9241 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9242 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9243 }
9244#endif
9245
9246 /*
9247 * Advance.
9248 */
9249 pCallEntry++;
9250#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9251 idxCurCall++;
9252#endif
9253 }
9254
9255 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9256 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9257 if (!cThreadedCalls)
9258 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9259
9260#ifdef VBOX_WITH_STATISTICS
9261 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
9262#endif
9263
9264 /*
9265 * Emit the epilog code.
9266 */
9267 uint32_t idxReturnLabel;
9268 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9269
9270 /*
9271 * Generate special jump labels.
9272 */
9273 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9274 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9275 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9276 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9277
9278 /*
9279 * Generate simple TB tail labels that just calls a help with a pVCpu
9280 * arg and either return or longjmps/throws a non-zero status.
9281 *
9282 * The array entries must be ordered by enmLabel value so we can index
9283 * using fTailLabels bit numbers.
9284 */
9285 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9286 static struct
9287 {
9288 IEMNATIVELABELTYPE enmLabel;
9289 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9290 } const g_aSimpleTailLabels[] =
9291 {
9292 { kIemNativeLabelType_Invalid, NULL },
9293 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9294 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9295 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9296 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9297 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9298 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9299 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9300 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9301 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9302 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9303 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9304 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9305 };
9306
9307 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9308 AssertCompile(kIemNativeLabelType_Invalid == 0);
9309 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9310 if (fTailLabels)
9311 {
9312 do
9313 {
9314 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9315 fTailLabels &= ~RT_BIT_64(enmLabel);
9316 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9317
9318 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9319 Assert(idxLabel != UINT32_MAX);
9320 if (idxLabel != UINT32_MAX)
9321 {
9322 iemNativeLabelDefine(pReNative, idxLabel, off);
9323
9324 /* int pfnCallback(PVMCPUCC pVCpu) */
9325 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9326 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9327
9328 /* jump back to the return sequence. */
9329 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9330 }
9331
9332 } while (fTailLabels);
9333 }
9334 }
9335 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9336 {
9337 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9338 return pTb;
9339 }
9340 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9341 Assert(off <= pReNative->cInstrBufAlloc);
9342
9343 /*
9344 * Make sure all labels has been defined.
9345 */
9346 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9347#ifdef VBOX_STRICT
9348 uint32_t const cLabels = pReNative->cLabels;
9349 for (uint32_t i = 0; i < cLabels; i++)
9350 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9351#endif
9352
9353#if 0 /* For profiling the native recompiler code. */
9354 if (pTb->Thrd.cCalls >= 136)
9355 {
9356 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9357 goto l_profile_again;
9358 }
9359#endif
9360
9361 /*
9362 * Allocate executable memory, copy over the code we've generated.
9363 */
9364 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9365 if (pTbAllocator->pDelayedFreeHead)
9366 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9367
9368 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9369 AssertReturn(paFinalInstrBuf, pTb);
9370 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9371
9372 /*
9373 * Apply fixups.
9374 */
9375 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9376 uint32_t const cFixups = pReNative->cFixups;
9377 for (uint32_t i = 0; i < cFixups; i++)
9378 {
9379 Assert(paFixups[i].off < off);
9380 Assert(paFixups[i].idxLabel < cLabels);
9381 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9382 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9383 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9384 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9385 switch (paFixups[i].enmType)
9386 {
9387#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9388 case kIemNativeFixupType_Rel32:
9389 Assert(paFixups[i].off + 4 <= off);
9390 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9391 continue;
9392
9393#elif defined(RT_ARCH_ARM64)
9394 case kIemNativeFixupType_RelImm26At0:
9395 {
9396 Assert(paFixups[i].off < off);
9397 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9398 Assert(offDisp >= -262144 && offDisp < 262144);
9399 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9400 continue;
9401 }
9402
9403 case kIemNativeFixupType_RelImm19At5:
9404 {
9405 Assert(paFixups[i].off < off);
9406 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9407 Assert(offDisp >= -262144 && offDisp < 262144);
9408 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9409 continue;
9410 }
9411
9412 case kIemNativeFixupType_RelImm14At5:
9413 {
9414 Assert(paFixups[i].off < off);
9415 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9416 Assert(offDisp >= -8192 && offDisp < 8192);
9417 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9418 continue;
9419 }
9420
9421#endif
9422 case kIemNativeFixupType_Invalid:
9423 case kIemNativeFixupType_End:
9424 break;
9425 }
9426 AssertFailed();
9427 }
9428
9429 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9430 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9431
9432 /*
9433 * Convert the translation block.
9434 */
9435 RTMemFree(pTb->Thrd.paCalls);
9436 pTb->Native.paInstructions = paFinalInstrBuf;
9437 pTb->Native.cInstructions = off;
9438 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9439#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9440 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9441 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9442#endif
9443
9444 Assert(pTbAllocator->cThreadedTbs > 0);
9445 pTbAllocator->cThreadedTbs -= 1;
9446 pTbAllocator->cNativeTbs += 1;
9447 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9448
9449#ifdef LOG_ENABLED
9450 /*
9451 * Disassemble to the log if enabled.
9452 */
9453 if (LogIs3Enabled())
9454 {
9455 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9456 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9457# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9458 RTLogFlush(NULL);
9459# endif
9460 }
9461#endif
9462 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9463
9464 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9465 return pTb;
9466}
9467
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette