VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104115

Last change on this file since 104115 was 104115, checked in by vboxsync, 8 months ago

VMM/IEM: Split out the executable memory allocator from IEMAllN8veRecompiler.cpp and into a separate file, IEMAllN8veExecMem.cpp. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 407.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104115 2024-03-29 02:11:56Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 iemRaiseDivideErrorJmp(pVCpu);
138#ifndef _MSC_VER
139 return VINF_IEM_RAISED_XCPT; /* not reached */
140#endif
141}
142
143
144/**
145 * Used by TB code when it wants to raise a \#UD.
146 */
147IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
148{
149 iemRaiseUndefinedOpcodeJmp(pVCpu);
150#ifndef _MSC_VER
151 return VINF_IEM_RAISED_XCPT; /* not reached */
152#endif
153}
154
155
156/**
157 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
158 *
159 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
160 */
161IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
162{
163 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
164 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
165 iemRaiseUndefinedOpcodeJmp(pVCpu);
166 else
167 iemRaiseDeviceNotAvailableJmp(pVCpu);
168#ifndef _MSC_VER
169 return VINF_IEM_RAISED_XCPT; /* not reached */
170#endif
171}
172
173
174/**
175 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
176 *
177 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
178 */
179IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
180{
181 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
182 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
183 iemRaiseUndefinedOpcodeJmp(pVCpu);
184 else
185 iemRaiseDeviceNotAvailableJmp(pVCpu);
186#ifndef _MSC_VER
187 return VINF_IEM_RAISED_XCPT; /* not reached */
188#endif
189}
190
191
192/**
193 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
194 *
195 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
196 */
197IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
198{
199 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
200 iemRaiseSimdFpExceptionJmp(pVCpu);
201 else
202 iemRaiseUndefinedOpcodeJmp(pVCpu);
203#ifndef _MSC_VER
204 return VINF_IEM_RAISED_XCPT; /* not reached */
205#endif
206}
207
208
209/**
210 * Used by TB code when it wants to raise a \#NM.
211 */
212IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
213{
214 iemRaiseDeviceNotAvailableJmp(pVCpu);
215#ifndef _MSC_VER
216 return VINF_IEM_RAISED_XCPT; /* not reached */
217#endif
218}
219
220
221/**
222 * Used by TB code when it wants to raise a \#GP(0).
223 */
224IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
225{
226 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
227#ifndef _MSC_VER
228 return VINF_IEM_RAISED_XCPT; /* not reached */
229#endif
230}
231
232
233/**
234 * Used by TB code when it wants to raise a \#MF.
235 */
236IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
237{
238 iemRaiseMathFaultJmp(pVCpu);
239#ifndef _MSC_VER
240 return VINF_IEM_RAISED_XCPT; /* not reached */
241#endif
242}
243
244
245/**
246 * Used by TB code when it wants to raise a \#XF.
247 */
248IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
249{
250 iemRaiseSimdFpExceptionJmp(pVCpu);
251#ifndef _MSC_VER
252 return VINF_IEM_RAISED_XCPT; /* not reached */
253#endif
254}
255
256
257/**
258 * Used by TB code when detecting opcode changes.
259 * @see iemThreadeFuncWorkerObsoleteTb
260 */
261IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
262{
263 /* We set fSafeToFree to false where as we're being called in the context
264 of a TB callback function, which for native TBs means we cannot release
265 the executable memory till we've returned our way back to iemTbExec as
266 that return path codes via the native code generated for the TB. */
267 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
268 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
269 return VINF_IEM_REEXEC_BREAK;
270}
271
272
273/**
274 * Used by TB code when we need to switch to a TB with CS.LIM checking.
275 */
276IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
277{
278 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
279 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
280 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
281 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
282 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
283 return VINF_IEM_REEXEC_BREAK;
284}
285
286
287/**
288 * Used by TB code when we missed a PC check after a branch.
289 */
290IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
291{
292 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
293 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
294 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
295 pVCpu->iem.s.pbInstrBuf));
296 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
297 return VINF_IEM_REEXEC_BREAK;
298}
299
300
301
302/*********************************************************************************************************************************
303* Helpers: Segmented memory fetches and stores. *
304*********************************************************************************************************************************/
305
306/**
307 * Used by TB code to load unsigned 8-bit data w/ segmentation.
308 */
309IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
310{
311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
312 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
313#else
314 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
315#endif
316}
317
318
319/**
320 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
321 * to 16 bits.
322 */
323IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
324{
325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
326 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
327#else
328 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
329#endif
330}
331
332
333/**
334 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
335 * to 32 bits.
336 */
337IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
338{
339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
340 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
341#else
342 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
343#endif
344}
345
346/**
347 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
348 * to 64 bits.
349 */
350IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
351{
352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
353 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
354#else
355 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
356#endif
357}
358
359
360/**
361 * Used by TB code to load unsigned 16-bit data w/ segmentation.
362 */
363IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
364{
365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
366 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
367#else
368 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
369#endif
370}
371
372
373/**
374 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
375 * to 32 bits.
376 */
377IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
378{
379#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
380 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
381#else
382 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
383#endif
384}
385
386
387/**
388 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
389 * to 64 bits.
390 */
391IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
392{
393#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
394 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
395#else
396 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
397#endif
398}
399
400
401/**
402 * Used by TB code to load unsigned 32-bit data w/ segmentation.
403 */
404IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
405{
406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
407 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
408#else
409 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
410#endif
411}
412
413
414/**
415 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
416 * to 64 bits.
417 */
418IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
419{
420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
421 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
422#else
423 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
424#endif
425}
426
427
428/**
429 * Used by TB code to load unsigned 64-bit data w/ segmentation.
430 */
431IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
432{
433#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
434 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
435#else
436 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
437#endif
438}
439
440
441#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
442/**
443 * Used by TB code to load 128-bit data w/ segmentation.
444 */
445IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
446{
447#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
448 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
449#else
450 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
451#endif
452}
453
454
455/**
456 * Used by TB code to load 128-bit data w/ segmentation.
457 */
458IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
459{
460#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
461 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
462#else
463 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
464#endif
465}
466
467
468/**
469 * Used by TB code to load 128-bit data w/ segmentation.
470 */
471IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
472{
473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
474 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
475#else
476 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
477#endif
478}
479
480
481/**
482 * Used by TB code to load 256-bit data w/ segmentation.
483 */
484IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
485{
486#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
487 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
488#else
489 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
490#endif
491}
492
493
494/**
495 * Used by TB code to load 256-bit data w/ segmentation.
496 */
497IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
498{
499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
500 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
501#else
502 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
503#endif
504}
505#endif
506
507
508/**
509 * Used by TB code to store unsigned 8-bit data w/ segmentation.
510 */
511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
512{
513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
514 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
515#else
516 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
517#endif
518}
519
520
521/**
522 * Used by TB code to store unsigned 16-bit data w/ segmentation.
523 */
524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
525{
526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
527 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
528#else
529 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
530#endif
531}
532
533
534/**
535 * Used by TB code to store unsigned 32-bit data w/ segmentation.
536 */
537IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
538{
539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
540 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
541#else
542 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
543#endif
544}
545
546
547/**
548 * Used by TB code to store unsigned 64-bit data w/ segmentation.
549 */
550IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
551{
552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
553 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
554#else
555 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
556#endif
557}
558
559
560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
561/**
562 * Used by TB code to store unsigned 128-bit data w/ segmentation.
563 */
564IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
565{
566#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
567 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
568#else
569 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
570#endif
571}
572
573
574/**
575 * Used by TB code to store unsigned 128-bit data w/ segmentation.
576 */
577IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
578{
579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
580 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
581#else
582 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
583#endif
584}
585
586
587/**
588 * Used by TB code to store unsigned 256-bit data w/ segmentation.
589 */
590IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
591{
592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
593 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
594#else
595 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
596#endif
597}
598
599
600/**
601 * Used by TB code to store unsigned 256-bit data w/ segmentation.
602 */
603IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
604{
605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
606 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
607#else
608 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
609#endif
610}
611#endif
612
613
614
615/**
616 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
617 */
618IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
619{
620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
621 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
622#else
623 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
624#endif
625}
626
627
628/**
629 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
630 */
631IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
632{
633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
634 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
635#else
636 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
637#endif
638}
639
640
641/**
642 * Used by TB code to store an 32-bit selector value onto a generic stack.
643 *
644 * Intel CPUs doesn't do write a whole dword, thus the special function.
645 */
646IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
647{
648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
649 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
650#else
651 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
652#endif
653}
654
655
656/**
657 * Used by TB code to push unsigned 64-bit value onto a generic stack.
658 */
659IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
660{
661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
662 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
663#else
664 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
665#endif
666}
667
668
669/**
670 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
671 */
672IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
673{
674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
675 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
676#else
677 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
678#endif
679}
680
681
682/**
683 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
684 */
685IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
686{
687#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
688 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
689#else
690 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
691#endif
692}
693
694
695/**
696 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
697 */
698IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
699{
700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
701 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
702#else
703 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
704#endif
705}
706
707
708
709/*********************************************************************************************************************************
710* Helpers: Flat memory fetches and stores. *
711*********************************************************************************************************************************/
712
713/**
714 * Used by TB code to load unsigned 8-bit data w/ flat address.
715 * @note Zero extending the value to 64-bit to simplify assembly.
716 */
717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
721#else
722 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
729 * to 16 bits.
730 * @note Zero extending the value to 64-bit to simplify assembly.
731 */
732IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
733{
734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
735 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
736#else
737 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
738#endif
739}
740
741
742/**
743 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
744 * to 32 bits.
745 * @note Zero extending the value to 64-bit to simplify assembly.
746 */
747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
748{
749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
750 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
751#else
752 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
753#endif
754}
755
756
757/**
758 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
759 * to 64 bits.
760 */
761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
762{
763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
764 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
765#else
766 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
767#endif
768}
769
770
771/**
772 * Used by TB code to load unsigned 16-bit data w/ flat address.
773 * @note Zero extending the value to 64-bit to simplify assembly.
774 */
775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
776{
777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
778 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
779#else
780 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
781#endif
782}
783
784
785/**
786 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
787 * to 32 bits.
788 * @note Zero extending the value to 64-bit to simplify assembly.
789 */
790IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
791{
792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
793 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
794#else
795 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
796#endif
797}
798
799
800/**
801 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
802 * to 64 bits.
803 * @note Zero extending the value to 64-bit to simplify assembly.
804 */
805IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
806{
807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
808 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
809#else
810 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
811#endif
812}
813
814
815/**
816 * Used by TB code to load unsigned 32-bit data w/ flat address.
817 * @note Zero extending the value to 64-bit to simplify assembly.
818 */
819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
820{
821#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
822 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
823#else
824 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
825#endif
826}
827
828
829/**
830 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
831 * to 64 bits.
832 * @note Zero extending the value to 64-bit to simplify assembly.
833 */
834IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
835{
836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
837 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
838#else
839 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
840#endif
841}
842
843
844/**
845 * Used by TB code to load unsigned 64-bit data w/ flat address.
846 */
847IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
848{
849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
850 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
851#else
852 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
853#endif
854}
855
856
857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
858/**
859 * Used by TB code to load unsigned 128-bit data w/ flat address.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
864 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
865#else
866 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
867#endif
868}
869
870
871/**
872 * Used by TB code to load unsigned 128-bit data w/ flat address.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
877 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
878#else
879 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
880#endif
881}
882
883
884/**
885 * Used by TB code to load unsigned 128-bit data w/ flat address.
886 */
887IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
888{
889#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
890 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
891#else
892 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
893#endif
894}
895
896
897/**
898 * Used by TB code to load unsigned 256-bit data w/ flat address.
899 */
900IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
901{
902#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
903 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
904#else
905 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
906#endif
907}
908
909
910/**
911 * Used by TB code to load unsigned 256-bit data w/ flat address.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
916 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
917#else
918 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
919#endif
920}
921#endif
922
923
924/**
925 * Used by TB code to store unsigned 8-bit data w/ flat address.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
930 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
931#else
932 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to store unsigned 16-bit data w/ flat address.
939 */
940IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
943 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
944#else
945 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
946#endif
947}
948
949
950/**
951 * Used by TB code to store unsigned 32-bit data w/ flat address.
952 */
953IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
956 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
957#else
958 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
959#endif
960}
961
962
963/**
964 * Used by TB code to store unsigned 64-bit data w/ flat address.
965 */
966IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
969 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
970#else
971 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
972#endif
973}
974
975
976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
977/**
978 * Used by TB code to store unsigned 128-bit data w/ flat address.
979 */
980IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
981{
982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
983 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
984#else
985 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
986#endif
987}
988
989
990/**
991 * Used by TB code to store unsigned 128-bit data w/ flat address.
992 */
993IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
994{
995#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
996 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
997#else
998 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
999#endif
1000}
1001
1002
1003/**
1004 * Used by TB code to store unsigned 256-bit data w/ flat address.
1005 */
1006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1007{
1008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1009 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1010#else
1011 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1012#endif
1013}
1014
1015
1016/**
1017 * Used by TB code to store unsigned 256-bit data w/ flat address.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1022 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1023#else
1024 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1025#endif
1026}
1027#endif
1028
1029
1030
1031/**
1032 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1037 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1038#else
1039 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1050 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1051#else
1052 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to store a segment selector value onto a flat stack.
1059 *
1060 * Intel CPUs doesn't do write a whole dword, thus the special function.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1065 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1066#else
1067 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1074 */
1075IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1076{
1077#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1078 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1079#else
1080 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1081#endif
1082}
1083
1084
1085/**
1086 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1087 */
1088IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1089{
1090#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1091 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1092#else
1093 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1094#endif
1095}
1096
1097
1098/**
1099 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1104 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1105#else
1106 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1117 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1118#else
1119 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124
1125/*********************************************************************************************************************************
1126* Helpers: Segmented memory mapping. *
1127*********************************************************************************************************************************/
1128
1129/**
1130 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1131 * segmentation.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1134 RTGCPTR GCPtrMem, uint8_t iSegReg))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1137 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1138#else
1139 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1148 RTGCPTR GCPtrMem, uint8_t iSegReg))
1149{
1150#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1151 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1152#else
1153 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1154#endif
1155}
1156
1157
1158/**
1159 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1160 */
1161IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1162 RTGCPTR GCPtrMem, uint8_t iSegReg))
1163{
1164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1165 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1166#else
1167 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1168#endif
1169}
1170
1171
1172/**
1173 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1174 */
1175IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1176 RTGCPTR GCPtrMem, uint8_t iSegReg))
1177{
1178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1179 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1180#else
1181 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1182#endif
1183}
1184
1185
1186/**
1187 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1188 * segmentation.
1189 */
1190IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1191 RTGCPTR GCPtrMem, uint8_t iSegReg))
1192{
1193#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1194 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1195#else
1196 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1197#endif
1198}
1199
1200
1201/**
1202 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1203 */
1204IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1205 RTGCPTR GCPtrMem, uint8_t iSegReg))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1208 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1209#else
1210 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1219 RTGCPTR GCPtrMem, uint8_t iSegReg))
1220{
1221#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1222 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1223#else
1224 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1225#endif
1226}
1227
1228
1229/**
1230 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1231 */
1232IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1233 RTGCPTR GCPtrMem, uint8_t iSegReg))
1234{
1235#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1236 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1237#else
1238 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1239#endif
1240}
1241
1242
1243/**
1244 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1245 * segmentation.
1246 */
1247IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1248 RTGCPTR GCPtrMem, uint8_t iSegReg))
1249{
1250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1251 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1252#else
1253 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1254#endif
1255}
1256
1257
1258/**
1259 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1260 */
1261IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1262 RTGCPTR GCPtrMem, uint8_t iSegReg))
1263{
1264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1265 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1266#else
1267 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1268#endif
1269}
1270
1271
1272/**
1273 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1274 */
1275IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1276 RTGCPTR GCPtrMem, uint8_t iSegReg))
1277{
1278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1279 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1280#else
1281 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1282#endif
1283}
1284
1285
1286/**
1287 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1288 */
1289IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1290 RTGCPTR GCPtrMem, uint8_t iSegReg))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1293 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1294#else
1295 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1296#endif
1297}
1298
1299
1300/**
1301 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1302 * segmentation.
1303 */
1304IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1305 RTGCPTR GCPtrMem, uint8_t iSegReg))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1308 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1309#else
1310 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1319 RTGCPTR GCPtrMem, uint8_t iSegReg))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1322 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1323#else
1324 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1331 */
1332IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1333 RTGCPTR GCPtrMem, uint8_t iSegReg))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1336 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1337#else
1338 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1347 RTGCPTR GCPtrMem, uint8_t iSegReg))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1350 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1351#else
1352 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1361 RTGCPTR GCPtrMem, uint8_t iSegReg))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1364 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1365#else
1366 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1375 RTGCPTR GCPtrMem, uint8_t iSegReg))
1376{
1377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1378 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1379#else
1380 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1381#endif
1382}
1383
1384
1385/**
1386 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1387 * segmentation.
1388 */
1389IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1390 RTGCPTR GCPtrMem, uint8_t iSegReg))
1391{
1392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1393 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1394#else
1395 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1396#endif
1397}
1398
1399
1400/**
1401 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1402 */
1403IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1404 RTGCPTR GCPtrMem, uint8_t iSegReg))
1405{
1406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1407 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1408#else
1409 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#endif
1411}
1412
1413
1414/**
1415 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1416 */
1417IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1418 RTGCPTR GCPtrMem, uint8_t iSegReg))
1419{
1420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1421 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1422#else
1423 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#endif
1425}
1426
1427
1428/**
1429 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1430 */
1431IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1432 RTGCPTR GCPtrMem, uint8_t iSegReg))
1433{
1434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1435 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1436#else
1437 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#endif
1439}
1440
1441
1442/*********************************************************************************************************************************
1443* Helpers: Flat memory mapping. *
1444*********************************************************************************************************************************/
1445
1446/**
1447 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1448 * address.
1449 */
1450IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1451{
1452#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1453 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1454#else
1455 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1456#endif
1457}
1458
1459
1460/**
1461 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1467#else
1468 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1480#else
1481 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1490{
1491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1492 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1493#else
1494 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1495#endif
1496}
1497
1498
1499/**
1500 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1501 * address.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1504{
1505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1506 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1507#else
1508 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1509#endif
1510}
1511
1512
1513/**
1514 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1515 */
1516IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1517{
1518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1519 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1520#else
1521 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1522#endif
1523}
1524
1525
1526/**
1527 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1528 */
1529IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1530{
1531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1532 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1533#else
1534 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1535#endif
1536}
1537
1538
1539/**
1540 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1541 */
1542IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1543{
1544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1545 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1546#else
1547 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1548#endif
1549}
1550
1551
1552/**
1553 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1554 * address.
1555 */
1556IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1557{
1558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1559 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1560#else
1561 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1562#endif
1563}
1564
1565
1566/**
1567 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1568 */
1569IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1570{
1571#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1572 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1573#else
1574 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1575#endif
1576}
1577
1578
1579/**
1580 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1581 */
1582IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1583{
1584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1585 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1586#else
1587 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1588#endif
1589}
1590
1591
1592/**
1593 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1594 */
1595IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1599#else
1600 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1607 * address.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1613#else
1614 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1623{
1624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1625 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1626#else
1627 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1628#endif
1629}
1630
1631
1632/**
1633 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1636{
1637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1638 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1639#else
1640 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1641#endif
1642}
1643
1644
1645/**
1646 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1652#else
1653 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1665#else
1666 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1675{
1676#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1677 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1678#else
1679 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1680#endif
1681}
1682
1683
1684/**
1685 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1686 * address.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1689{
1690#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1691 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1692#else
1693 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1694#endif
1695}
1696
1697
1698/**
1699 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1704 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1705#else
1706 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1715{
1716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1717 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1718#else
1719 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1720#endif
1721}
1722
1723
1724/**
1725 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1730 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1731#else
1732 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1733#endif
1734}
1735
1736
1737/*********************************************************************************************************************************
1738* Helpers: Commit, rollback & unmap *
1739*********************************************************************************************************************************/
1740
1741/**
1742 * Used by TB code to commit and unmap a read-write memory mapping.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1745{
1746 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1747}
1748
1749
1750/**
1751 * Used by TB code to commit and unmap a read-write memory mapping.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1754{
1755 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1756}
1757
1758
1759/**
1760 * Used by TB code to commit and unmap a write-only memory mapping.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1763{
1764 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1765}
1766
1767
1768/**
1769 * Used by TB code to commit and unmap a read-only memory mapping.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1772{
1773 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1774}
1775
1776
1777/**
1778 * Reinitializes the native recompiler state.
1779 *
1780 * Called before starting a new recompile job.
1781 */
1782static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1783{
1784 pReNative->cLabels = 0;
1785 pReNative->bmLabelTypes = 0;
1786 pReNative->cFixups = 0;
1787#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1788 pReNative->pDbgInfo->cEntries = 0;
1789 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1790#endif
1791 pReNative->pTbOrg = pTb;
1792 pReNative->cCondDepth = 0;
1793 pReNative->uCondSeqNo = 0;
1794 pReNative->uCheckIrqSeqNo = 0;
1795 pReNative->uTlbSeqNo = 0;
1796
1797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1798 pReNative->Core.offPc = 0;
1799 pReNative->Core.cInstrPcUpdateSkipped = 0;
1800#endif
1801#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1802 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1803#endif
1804 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1805#if IEMNATIVE_HST_GREG_COUNT < 32
1806 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1807#endif
1808 ;
1809 pReNative->Core.bmHstRegsWithGstShadow = 0;
1810 pReNative->Core.bmGstRegShadows = 0;
1811#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1812 pReNative->Core.bmGstRegShadowDirty = 0;
1813#endif
1814 pReNative->Core.bmVars = 0;
1815 pReNative->Core.bmStack = 0;
1816 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1817 pReNative->Core.u64ArgVars = UINT64_MAX;
1818
1819 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
1820 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1821 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1822 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1823 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1824 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1825 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1826 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1827 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1828 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1829 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1830 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1837
1838 /* Full host register reinit: */
1839 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1840 {
1841 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1842 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1843 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1844 }
1845
1846 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1847 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1848#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1849 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1850#endif
1851#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1852 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1853#endif
1854#ifdef IEMNATIVE_REG_FIXED_TMP1
1855 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1856#endif
1857#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1858 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1859#endif
1860 );
1861 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1862 {
1863 fRegs &= ~RT_BIT_32(idxReg);
1864 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1865 }
1866
1867 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1868#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1869 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1870#endif
1871#ifdef IEMNATIVE_REG_FIXED_TMP0
1872 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1873#endif
1874#ifdef IEMNATIVE_REG_FIXED_TMP1
1875 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1876#endif
1877#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1878 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1879#endif
1880
1881#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1882 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1883# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1884 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1885# endif
1886 ;
1887 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1888 pReNative->Core.bmGstSimdRegShadows = 0;
1889 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1890 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1891
1892 /* Full host register reinit: */
1893 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1894 {
1895 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1896 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1897 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1898 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1899 }
1900
1901 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1902 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1903 {
1904 fRegs &= ~RT_BIT_32(idxReg);
1905 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1906 }
1907
1908#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1909 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1910#endif
1911
1912#endif
1913
1914 return pReNative;
1915}
1916
1917
1918/**
1919 * Allocates and initializes the native recompiler state.
1920 *
1921 * This is called the first time an EMT wants to recompile something.
1922 *
1923 * @returns Pointer to the new recompiler state.
1924 * @param pVCpu The cross context virtual CPU structure of the calling
1925 * thread.
1926 * @param pTb The TB that's about to be recompiled.
1927 * @thread EMT(pVCpu)
1928 */
1929static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1930{
1931 VMCPU_ASSERT_EMT(pVCpu);
1932
1933 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1934 AssertReturn(pReNative, NULL);
1935
1936 /*
1937 * Try allocate all the buffers and stuff we need.
1938 */
1939 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1940 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1941 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1943 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1944#endif
1945 if (RT_LIKELY( pReNative->pInstrBuf
1946 && pReNative->paLabels
1947 && pReNative->paFixups)
1948#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1949 && pReNative->pDbgInfo
1950#endif
1951 )
1952 {
1953 /*
1954 * Set the buffer & array sizes on success.
1955 */
1956 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1957 pReNative->cLabelsAlloc = _8K;
1958 pReNative->cFixupsAlloc = _16K;
1959#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1960 pReNative->cDbgInfoAlloc = _16K;
1961#endif
1962
1963 /* Other constant stuff: */
1964 pReNative->pVCpu = pVCpu;
1965
1966 /*
1967 * Done, just need to save it and reinit it.
1968 */
1969 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1970 return iemNativeReInit(pReNative, pTb);
1971 }
1972
1973 /*
1974 * Failed. Cleanup and return.
1975 */
1976 AssertFailed();
1977 RTMemFree(pReNative->pInstrBuf);
1978 RTMemFree(pReNative->paLabels);
1979 RTMemFree(pReNative->paFixups);
1980#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1981 RTMemFree(pReNative->pDbgInfo);
1982#endif
1983 RTMemFree(pReNative);
1984 return NULL;
1985}
1986
1987
1988/**
1989 * Creates a label
1990 *
1991 * If the label does not yet have a defined position,
1992 * call iemNativeLabelDefine() later to set it.
1993 *
1994 * @returns Label ID. Throws VBox status code on failure, so no need to check
1995 * the return value.
1996 * @param pReNative The native recompile state.
1997 * @param enmType The label type.
1998 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1999 * label is not yet defined (default).
2000 * @param uData Data associated with the lable. Only applicable to
2001 * certain type of labels. Default is zero.
2002 */
2003DECL_HIDDEN_THROW(uint32_t)
2004iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2005 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2006{
2007 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2008
2009 /*
2010 * Locate existing label definition.
2011 *
2012 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2013 * and uData is zero.
2014 */
2015 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2016 uint32_t const cLabels = pReNative->cLabels;
2017 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2018#ifndef VBOX_STRICT
2019 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2020 && offWhere == UINT32_MAX
2021 && uData == 0
2022#endif
2023 )
2024 {
2025#ifndef VBOX_STRICT
2026 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2027 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2028 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2029 if (idxLabel < pReNative->cLabels)
2030 return idxLabel;
2031#else
2032 for (uint32_t i = 0; i < cLabels; i++)
2033 if ( paLabels[i].enmType == enmType
2034 && paLabels[i].uData == uData)
2035 {
2036 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2037 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2038 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2039 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2040 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2041 return i;
2042 }
2043 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2044 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2045#endif
2046 }
2047
2048 /*
2049 * Make sure we've got room for another label.
2050 */
2051 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2052 { /* likely */ }
2053 else
2054 {
2055 uint32_t cNew = pReNative->cLabelsAlloc;
2056 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2057 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2058 cNew *= 2;
2059 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2060 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2061 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2062 pReNative->paLabels = paLabels;
2063 pReNative->cLabelsAlloc = cNew;
2064 }
2065
2066 /*
2067 * Define a new label.
2068 */
2069 paLabels[cLabels].off = offWhere;
2070 paLabels[cLabels].enmType = enmType;
2071 paLabels[cLabels].uData = uData;
2072 pReNative->cLabels = cLabels + 1;
2073
2074 Assert((unsigned)enmType < 64);
2075 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2076
2077 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2078 {
2079 Assert(uData == 0);
2080 pReNative->aidxUniqueLabels[enmType] = cLabels;
2081 }
2082
2083 if (offWhere != UINT32_MAX)
2084 {
2085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2086 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2087 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2088#endif
2089 }
2090 return cLabels;
2091}
2092
2093
2094/**
2095 * Defines the location of an existing label.
2096 *
2097 * @param pReNative The native recompile state.
2098 * @param idxLabel The label to define.
2099 * @param offWhere The position.
2100 */
2101DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2102{
2103 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2104 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2105 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2106 pLabel->off = offWhere;
2107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2108 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2109 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2110#endif
2111}
2112
2113
2114/**
2115 * Looks up a lable.
2116 *
2117 * @returns Label ID if found, UINT32_MAX if not.
2118 */
2119static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2120 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2121{
2122 Assert((unsigned)enmType < 64);
2123 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2124 {
2125 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2126 return pReNative->aidxUniqueLabels[enmType];
2127
2128 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2129 uint32_t const cLabels = pReNative->cLabels;
2130 for (uint32_t i = 0; i < cLabels; i++)
2131 if ( paLabels[i].enmType == enmType
2132 && paLabels[i].uData == uData
2133 && ( paLabels[i].off == offWhere
2134 || offWhere == UINT32_MAX
2135 || paLabels[i].off == UINT32_MAX))
2136 return i;
2137 }
2138 return UINT32_MAX;
2139}
2140
2141
2142/**
2143 * Adds a fixup.
2144 *
2145 * @throws VBox status code (int) on failure.
2146 * @param pReNative The native recompile state.
2147 * @param offWhere The instruction offset of the fixup location.
2148 * @param idxLabel The target label ID for the fixup.
2149 * @param enmType The fixup type.
2150 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2151 */
2152DECL_HIDDEN_THROW(void)
2153iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2154 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2155{
2156 Assert(idxLabel <= UINT16_MAX);
2157 Assert((unsigned)enmType <= UINT8_MAX);
2158#ifdef RT_ARCH_ARM64
2159 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2160 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2162#endif
2163
2164 /*
2165 * Make sure we've room.
2166 */
2167 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2168 uint32_t const cFixups = pReNative->cFixups;
2169 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2170 { /* likely */ }
2171 else
2172 {
2173 uint32_t cNew = pReNative->cFixupsAlloc;
2174 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2175 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2176 cNew *= 2;
2177 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2178 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2179 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2180 pReNative->paFixups = paFixups;
2181 pReNative->cFixupsAlloc = cNew;
2182 }
2183
2184 /*
2185 * Add the fixup.
2186 */
2187 paFixups[cFixups].off = offWhere;
2188 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2189 paFixups[cFixups].enmType = enmType;
2190 paFixups[cFixups].offAddend = offAddend;
2191 pReNative->cFixups = cFixups + 1;
2192}
2193
2194
2195/**
2196 * Slow code path for iemNativeInstrBufEnsure.
2197 */
2198DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2199{
2200 /* Double the buffer size till we meet the request. */
2201 uint32_t cNew = pReNative->cInstrBufAlloc;
2202 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2203 do
2204 cNew *= 2;
2205 while (cNew < off + cInstrReq);
2206
2207 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2208#ifdef RT_ARCH_ARM64
2209 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2210#else
2211 uint32_t const cbMaxInstrBuf = _2M;
2212#endif
2213 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2214
2215 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2216 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2217
2218#ifdef VBOX_STRICT
2219 pReNative->offInstrBufChecked = off + cInstrReq;
2220#endif
2221 pReNative->cInstrBufAlloc = cNew;
2222 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2223}
2224
2225#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2226
2227/**
2228 * Grows the static debug info array used during recompilation.
2229 *
2230 * @returns Pointer to the new debug info block; throws VBox status code on
2231 * failure, so no need to check the return value.
2232 */
2233DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2234{
2235 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2236 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2237 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2238 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2239 pReNative->pDbgInfo = pDbgInfo;
2240 pReNative->cDbgInfoAlloc = cNew;
2241 return pDbgInfo;
2242}
2243
2244
2245/**
2246 * Adds a new debug info uninitialized entry, returning the pointer to it.
2247 */
2248DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2249{
2250 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2251 { /* likely */ }
2252 else
2253 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2254 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2255}
2256
2257
2258/**
2259 * Debug Info: Adds a native offset record, if necessary.
2260 */
2261DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2262{
2263 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2264
2265 /*
2266 * Do we need this one?
2267 */
2268 uint32_t const offPrev = pDbgInfo->offNativeLast;
2269 if (offPrev == off)
2270 return;
2271 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2272
2273 /*
2274 * Add it.
2275 */
2276 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2277 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2278 pEntry->NativeOffset.offNative = off;
2279 pDbgInfo->offNativeLast = off;
2280}
2281
2282
2283/**
2284 * Debug Info: Record info about a label.
2285 */
2286static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2287{
2288 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2289 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2290 pEntry->Label.uUnused = 0;
2291 pEntry->Label.enmLabel = (uint8_t)enmType;
2292 pEntry->Label.uData = uData;
2293}
2294
2295
2296/**
2297 * Debug Info: Record info about a threaded call.
2298 */
2299static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2300{
2301 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2302 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2303 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2304 pEntry->ThreadedCall.uUnused = 0;
2305 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2306}
2307
2308
2309/**
2310 * Debug Info: Record info about a new guest instruction.
2311 */
2312static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2313{
2314 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2315 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2316 pEntry->GuestInstruction.uUnused = 0;
2317 pEntry->GuestInstruction.fExec = fExec;
2318}
2319
2320
2321/**
2322 * Debug Info: Record info about guest register shadowing.
2323 */
2324DECL_HIDDEN_THROW(void)
2325iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2326 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2327{
2328 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2329 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2330 pEntry->GuestRegShadowing.uUnused = 0;
2331 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2332 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2333 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2334#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2335 Assert( idxHstReg != UINT8_MAX
2336 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2337#endif
2338}
2339
2340
2341# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2342/**
2343 * Debug Info: Record info about guest register shadowing.
2344 */
2345DECL_HIDDEN_THROW(void)
2346iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2347 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2348{
2349 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2350 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2351 pEntry->GuestSimdRegShadowing.uUnused = 0;
2352 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2353 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2354 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2355}
2356# endif
2357
2358
2359# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2360/**
2361 * Debug Info: Record info about delayed RIP updates.
2362 */
2363DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2364{
2365 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2366 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2367 pEntry->DelayedPcUpdate.offPc = offPc;
2368 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2369}
2370# endif
2371
2372# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2373
2374/**
2375 * Debug Info: Record info about a dirty guest register.
2376 */
2377DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2378 uint8_t idxGstReg, uint8_t idxHstReg)
2379{
2380 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2381 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2382 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2383 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2384 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2385}
2386
2387
2388/**
2389 * Debug Info: Record info about a dirty guest register writeback operation.
2390 */
2391DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2392{
2393 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2394 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2395 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2396 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;
2397 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
2398 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
2399}
2400
2401# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2402
2403#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2404
2405
2406/*********************************************************************************************************************************
2407* Register Allocator *
2408*********************************************************************************************************************************/
2409
2410/**
2411 * Register parameter indexes (indexed by argument number).
2412 */
2413DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2414{
2415 IEMNATIVE_CALL_ARG0_GREG,
2416 IEMNATIVE_CALL_ARG1_GREG,
2417 IEMNATIVE_CALL_ARG2_GREG,
2418 IEMNATIVE_CALL_ARG3_GREG,
2419#if defined(IEMNATIVE_CALL_ARG4_GREG)
2420 IEMNATIVE_CALL_ARG4_GREG,
2421# if defined(IEMNATIVE_CALL_ARG5_GREG)
2422 IEMNATIVE_CALL_ARG5_GREG,
2423# if defined(IEMNATIVE_CALL_ARG6_GREG)
2424 IEMNATIVE_CALL_ARG6_GREG,
2425# if defined(IEMNATIVE_CALL_ARG7_GREG)
2426 IEMNATIVE_CALL_ARG7_GREG,
2427# endif
2428# endif
2429# endif
2430#endif
2431};
2432AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2433
2434/**
2435 * Call register masks indexed by argument count.
2436 */
2437DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2438{
2439 0,
2440 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2441 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2442 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2443 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2444 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2445#if defined(IEMNATIVE_CALL_ARG4_GREG)
2446 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2447 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2448# if defined(IEMNATIVE_CALL_ARG5_GREG)
2449 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2450 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2451# if defined(IEMNATIVE_CALL_ARG6_GREG)
2452 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2453 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2454 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2455# if defined(IEMNATIVE_CALL_ARG7_GREG)
2456 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2457 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2458 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2459# endif
2460# endif
2461# endif
2462#endif
2463};
2464
2465#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2466/**
2467 * BP offset of the stack argument slots.
2468 *
2469 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2470 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2471 */
2472DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2473{
2474 IEMNATIVE_FP_OFF_STACK_ARG0,
2475# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2476 IEMNATIVE_FP_OFF_STACK_ARG1,
2477# endif
2478# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2479 IEMNATIVE_FP_OFF_STACK_ARG2,
2480# endif
2481# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2482 IEMNATIVE_FP_OFF_STACK_ARG3,
2483# endif
2484};
2485AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2486#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2487
2488/**
2489 * Info about shadowed guest register values.
2490 * @see IEMNATIVEGSTREG
2491 */
2492DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2493{
2494#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2495 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2496 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2497 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2498 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2499 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2500 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2501 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2502 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2503 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2504 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2505 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2506 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2507 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2508 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2509 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2510 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2511 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2512 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2513 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2514 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2515 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2516 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2517 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2518 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2519 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2520 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2521 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2522 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2523 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2524 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2525 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2526 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2527 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2528 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2529 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2530 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2531 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2532 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2533 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2534 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2535 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2536 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2537 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2538 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2539 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2540 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2541 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2542 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2543#undef CPUMCTX_OFF_AND_SIZE
2544};
2545AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2546
2547
2548/** Host CPU general purpose register names. */
2549DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2550{
2551#ifdef RT_ARCH_AMD64
2552 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2553#elif RT_ARCH_ARM64
2554 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2555 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2556#else
2557# error "port me"
2558#endif
2559};
2560
2561
2562#if 0 /* unused */
2563/**
2564 * Tries to locate a suitable register in the given register mask.
2565 *
2566 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2567 * failed.
2568 *
2569 * @returns Host register number on success, returns UINT8_MAX on failure.
2570 */
2571static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2572{
2573 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2574 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2575 if (fRegs)
2576 {
2577 /** @todo pick better here: */
2578 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2579
2580 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2581 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2582 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2583 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2584
2585 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2586 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2587 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2588 return idxReg;
2589 }
2590 return UINT8_MAX;
2591}
2592#endif /* unused */
2593
2594
2595#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2596/**
2597 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2598 *
2599 * @returns New code buffer offset on success, UINT32_MAX on failure.
2600 * @param pReNative .
2601 * @param off The current code buffer position.
2602 * @param enmGstReg The guest register to store to.
2603 * @param idxHstReg The host register to store from.
2604 */
2605DECL_FORCE_INLINE_THROW(uint32_t)
2606iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2607{
2608 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2609 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2610
2611 switch (g_aGstShadowInfo[enmGstReg].cb)
2612 {
2613 case sizeof(uint64_t):
2614 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2615 case sizeof(uint32_t):
2616 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2617 case sizeof(uint16_t):
2618 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2619#if 0 /* not present in the table. */
2620 case sizeof(uint8_t):
2621 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2622#endif
2623 default:
2624 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2625 }
2626}
2627
2628
2629/**
2630 * Emits code to flush a pending write of the given guest register if any.
2631 *
2632 * @returns New code buffer offset.
2633 * @param pReNative The native recompile state.
2634 * @param off Current code buffer position.
2635 * @param enmGstReg The guest register to flush.
2636 */
2637DECL_HIDDEN_THROW(uint32_t)
2638iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2639{
2640 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2641
2642 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
2643 Assert( idxHstReg != UINT8_MAX
2644 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2645 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2646 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2647
2648 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2649
2650 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2651 return off;
2652}
2653
2654
2655/**
2656 * Flush the given set of guest registers if marked as dirty.
2657 *
2658 * @returns New code buffer offset.
2659 * @param pReNative The native recompile state.
2660 * @param off Current code buffer position.
2661 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2662 */
2663DECL_HIDDEN_THROW(uint32_t)
2664iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2665{
2666 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2667 if (bmGstRegShadowDirty)
2668 {
2669# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2670 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2671 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2672# endif
2673 do
2674 {
2675 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2676 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2677 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2678 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2679 } while (bmGstRegShadowDirty);
2680 }
2681
2682 return off;
2683}
2684
2685
2686/**
2687 * Flush all shadowed guest registers marked as dirty for the given host register.
2688 *
2689 * @returns New code buffer offset.
2690 * @param pReNative The native recompile state.
2691 * @param off Current code buffer position.
2692 * @param idxHstReg The host register.
2693 *
2694 * @note This doesn't do any unshadowing of guest registers from the host register.
2695 */
2696DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2697{
2698 /* We need to flush any pending guest register writes this host register shadows. */
2699 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2700 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2701 {
2702# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2703 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2704 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2705# endif
2706 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2707 * likely to only have a single bit set. It'll be in the 0..15 range,
2708 * but still it's 15 unnecessary loops for the last guest register. */
2709
2710 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2711 do
2712 {
2713 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2714 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2715 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2716 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2717 } while (bmGstRegShadowDirty);
2718 }
2719
2720 return off;
2721}
2722#endif
2723
2724
2725/**
2726 * Locate a register, possibly freeing one up.
2727 *
2728 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2729 * failed.
2730 *
2731 * @returns Host register number on success. Returns UINT8_MAX if no registers
2732 * found, the caller is supposed to deal with this and raise a
2733 * allocation type specific status code (if desired).
2734 *
2735 * @throws VBox status code if we're run into trouble spilling a variable of
2736 * recording debug info. Does NOT throw anything if we're out of
2737 * registers, though.
2738 */
2739static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2740 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2741{
2742 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2743 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2744 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2745
2746 /*
2747 * Try a freed register that's shadowing a guest register.
2748 */
2749 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2750 if (fRegs)
2751 {
2752 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2753
2754#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2755 /*
2756 * When we have livness information, we use it to kick out all shadowed
2757 * guest register that will not be needed any more in this TB. If we're
2758 * lucky, this may prevent us from ending up here again.
2759 *
2760 * Note! We must consider the previous entry here so we don't free
2761 * anything that the current threaded function requires (current
2762 * entry is produced by the next threaded function).
2763 */
2764 uint32_t const idxCurCall = pReNative->idxCurCall;
2765 if (idxCurCall > 0)
2766 {
2767 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2768
2769# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2770 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2771 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2772 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2773#else
2774 /* Construct a mask of the registers not in the read or write state.
2775 Note! We could skips writes, if they aren't from us, as this is just
2776 a hack to prevent trashing registers that have just been written
2777 or will be written when we retire the current instruction. */
2778 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2779 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2780 & IEMLIVENESSBIT_MASK;
2781#endif
2782 /* Merge EFLAGS. */
2783 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2784 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2785 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2786 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2787 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2788
2789 /* If it matches any shadowed registers. */
2790 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2791 {
2792#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2793 /* Writeback any dirty shadow registers we are about to unshadow. */
2794 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2795#endif
2796
2797 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2798 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2799 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2800
2801 /* See if we've got any unshadowed registers we can return now. */
2802 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2803 if (fUnshadowedRegs)
2804 {
2805 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2806 return (fPreferVolatile
2807 ? ASMBitFirstSetU32(fUnshadowedRegs)
2808 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2809 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2810 - 1;
2811 }
2812 }
2813 }
2814#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2815
2816 unsigned const idxReg = (fPreferVolatile
2817 ? ASMBitFirstSetU32(fRegs)
2818 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2819 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2820 - 1;
2821
2822 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2823 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2824 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2825 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2826
2827#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2828 /* We need to flush any pending guest register writes this host register shadows. */
2829 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2830#endif
2831
2832 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2833 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2834 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2835 return idxReg;
2836 }
2837
2838 /*
2839 * Try free up a variable that's in a register.
2840 *
2841 * We do two rounds here, first evacuating variables we don't need to be
2842 * saved on the stack, then in the second round move things to the stack.
2843 */
2844 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2845 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2846 {
2847 uint32_t fVars = pReNative->Core.bmVars;
2848 while (fVars)
2849 {
2850 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2851 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2852#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2853 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2854 continue;
2855#endif
2856
2857 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2858 && (RT_BIT_32(idxReg) & fRegMask)
2859 && ( iLoop == 0
2860 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2861 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2862 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2863 {
2864 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2865 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2866 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2867 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2868 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2869 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2870#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2871 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2872#endif
2873
2874 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2875 {
2876 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2877 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2878 }
2879
2880 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2881 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2882
2883 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2884 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2885 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2886 return idxReg;
2887 }
2888 fVars &= ~RT_BIT_32(idxVar);
2889 }
2890 }
2891
2892 return UINT8_MAX;
2893}
2894
2895
2896/**
2897 * Reassigns a variable to a different register specified by the caller.
2898 *
2899 * @returns The new code buffer position.
2900 * @param pReNative The native recompile state.
2901 * @param off The current code buffer position.
2902 * @param idxVar The variable index.
2903 * @param idxRegOld The old host register number.
2904 * @param idxRegNew The new host register number.
2905 * @param pszCaller The caller for logging.
2906 */
2907static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2908 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2909{
2910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2911 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2912#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2913 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2914#endif
2915 RT_NOREF(pszCaller);
2916
2917#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2918 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2919#endif
2920 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2921
2922 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2923#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2924 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2925#endif
2926 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2927 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2928 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2929
2930 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2931 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2932 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2933 if (fGstRegShadows)
2934 {
2935 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2936 | RT_BIT_32(idxRegNew);
2937 while (fGstRegShadows)
2938 {
2939 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2940 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2941
2942 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2943 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2944 }
2945 }
2946
2947 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2948 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2949 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2950 return off;
2951}
2952
2953
2954/**
2955 * Moves a variable to a different register or spills it onto the stack.
2956 *
2957 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2958 * kinds can easily be recreated if needed later.
2959 *
2960 * @returns The new code buffer position.
2961 * @param pReNative The native recompile state.
2962 * @param off The current code buffer position.
2963 * @param idxVar The variable index.
2964 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2965 * call-volatile registers.
2966 */
2967DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2968 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
2969{
2970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2971 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2972 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
2973 Assert(!pVar->fRegAcquired);
2974
2975 uint8_t const idxRegOld = pVar->idxReg;
2976 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2977 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2978 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2979 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2980 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2981 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2982 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2983 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2984#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2985 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2986#endif
2987
2988
2989 /** @todo Add statistics on this.*/
2990 /** @todo Implement basic variable liveness analysis (python) so variables
2991 * can be freed immediately once no longer used. This has the potential to
2992 * be trashing registers and stack for dead variables.
2993 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
2994
2995 /*
2996 * First try move it to a different register, as that's cheaper.
2997 */
2998 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2999 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3000 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3001 if (fRegs)
3002 {
3003 /* Avoid using shadow registers, if possible. */
3004 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3005 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3006 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3007 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3008 }
3009
3010 /*
3011 * Otherwise we must spill the register onto the stack.
3012 */
3013 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3014 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3015 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3016 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3017
3018 pVar->idxReg = UINT8_MAX;
3019 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3020 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3021 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3022 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3023 return off;
3024}
3025
3026
3027/**
3028 * Allocates a temporary host general purpose register.
3029 *
3030 * This may emit code to save register content onto the stack in order to free
3031 * up a register.
3032 *
3033 * @returns The host register number; throws VBox status code on failure,
3034 * so no need to check the return value.
3035 * @param pReNative The native recompile state.
3036 * @param poff Pointer to the variable with the code buffer position.
3037 * This will be update if we need to move a variable from
3038 * register to stack in order to satisfy the request.
3039 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3040 * registers (@c true, default) or the other way around
3041 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3042 */
3043DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3044{
3045 /*
3046 * Try find a completely unused register, preferably a call-volatile one.
3047 */
3048 uint8_t idxReg;
3049 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3050 & ~pReNative->Core.bmHstRegsWithGstShadow
3051 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3052 if (fRegs)
3053 {
3054 if (fPreferVolatile)
3055 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3056 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3057 else
3058 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3059 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3060 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3061 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3062 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3063 }
3064 else
3065 {
3066 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3067 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3068 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3069 }
3070 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3071}
3072
3073
3074/**
3075 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3076 * registers.
3077 *
3078 * @returns The host register number; throws VBox status code on failure,
3079 * so no need to check the return value.
3080 * @param pReNative The native recompile state.
3081 * @param poff Pointer to the variable with the code buffer position.
3082 * This will be update if we need to move a variable from
3083 * register to stack in order to satisfy the request.
3084 * @param fRegMask Mask of acceptable registers.
3085 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3086 * registers (@c true, default) or the other way around
3087 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3088 */
3089DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3090 bool fPreferVolatile /*= true*/)
3091{
3092 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3093 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3094
3095 /*
3096 * Try find a completely unused register, preferably a call-volatile one.
3097 */
3098 uint8_t idxReg;
3099 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3100 & ~pReNative->Core.bmHstRegsWithGstShadow
3101 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3102 & fRegMask;
3103 if (fRegs)
3104 {
3105 if (fPreferVolatile)
3106 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3107 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3108 else
3109 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3110 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3111 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3112 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3113 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3114 }
3115 else
3116 {
3117 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3118 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3119 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3120 }
3121 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3122}
3123
3124
3125/**
3126 * Allocates a temporary register for loading an immediate value into.
3127 *
3128 * This will emit code to load the immediate, unless there happens to be an
3129 * unused register with the value already loaded.
3130 *
3131 * The caller will not modify the returned register, it must be considered
3132 * read-only. Free using iemNativeRegFreeTmpImm.
3133 *
3134 * @returns The host register number; throws VBox status code on failure, so no
3135 * need to check the return value.
3136 * @param pReNative The native recompile state.
3137 * @param poff Pointer to the variable with the code buffer position.
3138 * @param uImm The immediate value that the register must hold upon
3139 * return.
3140 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3141 * registers (@c true, default) or the other way around
3142 * (@c false).
3143 *
3144 * @note Reusing immediate values has not been implemented yet.
3145 */
3146DECL_HIDDEN_THROW(uint8_t)
3147iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3148{
3149 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3150 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3151 return idxReg;
3152}
3153
3154
3155/**
3156 * Allocates a temporary host general purpose register for keeping a guest
3157 * register value.
3158 *
3159 * Since we may already have a register holding the guest register value,
3160 * code will be emitted to do the loading if that's not the case. Code may also
3161 * be emitted if we have to free up a register to satify the request.
3162 *
3163 * @returns The host register number; throws VBox status code on failure, so no
3164 * need to check the return value.
3165 * @param pReNative The native recompile state.
3166 * @param poff Pointer to the variable with the code buffer
3167 * position. This will be update if we need to move a
3168 * variable from register to stack in order to satisfy
3169 * the request.
3170 * @param enmGstReg The guest register that will is to be updated.
3171 * @param enmIntendedUse How the caller will be using the host register.
3172 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3173 * register is okay (default). The ASSUMPTION here is
3174 * that the caller has already flushed all volatile
3175 * registers, so this is only applied if we allocate a
3176 * new register.
3177 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3178 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3179 */
3180DECL_HIDDEN_THROW(uint8_t)
3181iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3182 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3183 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3184{
3185 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3186#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3187 AssertMsg( fSkipLivenessAssert
3188 || pReNative->idxCurCall == 0
3189 || enmGstReg == kIemNativeGstReg_Pc
3190 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3191 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3192 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3193 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3194 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3195 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3196#endif
3197 RT_NOREF(fSkipLivenessAssert);
3198#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3199 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3200#endif
3201 uint32_t const fRegMask = !fNoVolatileRegs
3202 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3203 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3204
3205 /*
3206 * First check if the guest register value is already in a host register.
3207 */
3208 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3209 {
3210 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3211 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3212 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3213 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3214
3215 /* It's not supposed to be allocated... */
3216 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3217 {
3218 /*
3219 * If the register will trash the guest shadow copy, try find a
3220 * completely unused register we can use instead. If that fails,
3221 * we need to disassociate the host reg from the guest reg.
3222 */
3223 /** @todo would be nice to know if preserving the register is in any way helpful. */
3224 /* If the purpose is calculations, try duplicate the register value as
3225 we'll be clobbering the shadow. */
3226 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3227 && ( ~pReNative->Core.bmHstRegs
3228 & ~pReNative->Core.bmHstRegsWithGstShadow
3229 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3230 {
3231 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3232
3233 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3234
3235 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3236 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3237 g_apszIemNativeHstRegNames[idxRegNew]));
3238 idxReg = idxRegNew;
3239 }
3240 /* If the current register matches the restrictions, go ahead and allocate
3241 it for the caller. */
3242 else if (fRegMask & RT_BIT_32(idxReg))
3243 {
3244 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3245 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3246 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3247 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3248 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3249 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3250 else
3251 {
3252 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3253 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3254 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3255 }
3256 }
3257 /* Otherwise, allocate a register that satisfies the caller and transfer
3258 the shadowing if compatible with the intended use. (This basically
3259 means the call wants a non-volatile register (RSP push/pop scenario).) */
3260 else
3261 {
3262 Assert(fNoVolatileRegs);
3263 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3264 !fNoVolatileRegs
3265 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3266 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3267 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3268 {
3269 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3270 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3271 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3272 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3273 }
3274 else
3275 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3276 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3277 g_apszIemNativeHstRegNames[idxRegNew]));
3278 idxReg = idxRegNew;
3279 }
3280 }
3281 else
3282 {
3283 /*
3284 * Oops. Shadowed guest register already allocated!
3285 *
3286 * Allocate a new register, copy the value and, if updating, the
3287 * guest shadow copy assignment to the new register.
3288 */
3289 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3290 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3291 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3292 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3293
3294 /** @todo share register for readonly access. */
3295 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3296 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3297
3298 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3299 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3300
3301 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3302 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3303 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3304 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3305 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3306 else
3307 {
3308 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3309 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3310 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3311 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3312 }
3313 idxReg = idxRegNew;
3314 }
3315 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3316
3317#ifdef VBOX_STRICT
3318 /* Strict builds: Check that the value is correct. */
3319 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3320#endif
3321
3322#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3323 /** @todo r=aeichner Implement for registers other than GPR as well. */
3324 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3325 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3326 && enmGstReg >= kIemNativeGstReg_GprFirst
3327 && enmGstReg <= kIemNativeGstReg_GprLast
3328 )
3329 {
3330# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3331 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3332 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3333# endif
3334 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3335 }
3336#endif
3337
3338 return idxReg;
3339 }
3340
3341 /*
3342 * Allocate a new register, load it with the guest value and designate it as a copy of the
3343 */
3344 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3345
3346 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3347 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3348
3349 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3350 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3351 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3352 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3353
3354#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3355 /** @todo r=aeichner Implement for registers other than GPR as well. */
3356 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3357 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3358 && enmGstReg >= kIemNativeGstReg_GprFirst
3359 && enmGstReg <= kIemNativeGstReg_GprLast
3360 )
3361 {
3362# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3363 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3364 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3365# endif
3366 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3367 }
3368#endif
3369
3370 return idxRegNew;
3371}
3372
3373
3374/**
3375 * Allocates a temporary host general purpose register that already holds the
3376 * given guest register value.
3377 *
3378 * The use case for this function is places where the shadowing state cannot be
3379 * modified due to branching and such. This will fail if the we don't have a
3380 * current shadow copy handy or if it's incompatible. The only code that will
3381 * be emitted here is value checking code in strict builds.
3382 *
3383 * The intended use can only be readonly!
3384 *
3385 * @returns The host register number, UINT8_MAX if not present.
3386 * @param pReNative The native recompile state.
3387 * @param poff Pointer to the instruction buffer offset.
3388 * Will be updated in strict builds if a register is
3389 * found.
3390 * @param enmGstReg The guest register that will is to be updated.
3391 * @note In strict builds, this may throw instruction buffer growth failures.
3392 * Non-strict builds will not throw anything.
3393 * @sa iemNativeRegAllocTmpForGuestReg
3394 */
3395DECL_HIDDEN_THROW(uint8_t)
3396iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3397{
3398 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3399#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3400 AssertMsg( pReNative->idxCurCall == 0
3401 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3402 || enmGstReg == kIemNativeGstReg_Pc,
3403 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3404#endif
3405
3406 /*
3407 * First check if the guest register value is already in a host register.
3408 */
3409 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3410 {
3411 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3412 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3413 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3414 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3415
3416 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3417 {
3418 /*
3419 * We only do readonly use here, so easy compared to the other
3420 * variant of this code.
3421 */
3422 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3423 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3424 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3425 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3426 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3427
3428#ifdef VBOX_STRICT
3429 /* Strict builds: Check that the value is correct. */
3430 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3431#else
3432 RT_NOREF(poff);
3433#endif
3434 return idxReg;
3435 }
3436 }
3437
3438 return UINT8_MAX;
3439}
3440
3441
3442/**
3443 * Allocates argument registers for a function call.
3444 *
3445 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3446 * need to check the return value.
3447 * @param pReNative The native recompile state.
3448 * @param off The current code buffer offset.
3449 * @param cArgs The number of arguments the function call takes.
3450 */
3451DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3452{
3453 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3454 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3455 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3456 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3457
3458 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3459 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3460 else if (cArgs == 0)
3461 return true;
3462
3463 /*
3464 * Do we get luck and all register are free and not shadowing anything?
3465 */
3466 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3467 for (uint32_t i = 0; i < cArgs; i++)
3468 {
3469 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3470 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3471 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3472 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3473 }
3474 /*
3475 * Okay, not lucky so we have to free up the registers.
3476 */
3477 else
3478 for (uint32_t i = 0; i < cArgs; i++)
3479 {
3480 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3481 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3482 {
3483 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3484 {
3485 case kIemNativeWhat_Var:
3486 {
3487 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3488 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3489 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3490 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3491 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3492#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3493 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3494#endif
3495
3496 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3497 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3498 else
3499 {
3500 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3501 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3502 }
3503 break;
3504 }
3505
3506 case kIemNativeWhat_Tmp:
3507 case kIemNativeWhat_Arg:
3508 case kIemNativeWhat_rc:
3509 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3510 default:
3511 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3512 }
3513
3514 }
3515 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3516 {
3517 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3518 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3519 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3520#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3521 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3522#endif
3523 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3524 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3525 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3526 }
3527 else
3528 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3529 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3530 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3531 }
3532 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3533 return true;
3534}
3535
3536
3537DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3538
3539
3540#if 0
3541/**
3542 * Frees a register assignment of any type.
3543 *
3544 * @param pReNative The native recompile state.
3545 * @param idxHstReg The register to free.
3546 *
3547 * @note Does not update variables.
3548 */
3549DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3550{
3551 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3552 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3553 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3554 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3555 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3556 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3557 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3558 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3559 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3560 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3561 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3562 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3563 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3564 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3565
3566 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3567 /* no flushing, right:
3568 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3569 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3570 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3571 */
3572}
3573#endif
3574
3575
3576/**
3577 * Frees a temporary register.
3578 *
3579 * Any shadow copies of guest registers assigned to the host register will not
3580 * be flushed by this operation.
3581 */
3582DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3583{
3584 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3585 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3586 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3587 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3588 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3589}
3590
3591
3592/**
3593 * Frees a temporary immediate register.
3594 *
3595 * It is assumed that the call has not modified the register, so it still hold
3596 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3597 */
3598DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3599{
3600 iemNativeRegFreeTmp(pReNative, idxHstReg);
3601}
3602
3603
3604/**
3605 * Frees a register assigned to a variable.
3606 *
3607 * The register will be disassociated from the variable.
3608 */
3609DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3610{
3611 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3612 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3613 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3615 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3616#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3617 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3618#endif
3619
3620 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3621 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3622 if (!fFlushShadows)
3623 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3624 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3625 else
3626 {
3627 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3628 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3629#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3630 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3631#endif
3632 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3633 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3634 uint64_t fGstRegShadows = fGstRegShadowsOld;
3635 while (fGstRegShadows)
3636 {
3637 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3638 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3639
3640 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3641 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3642 }
3643 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3644 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3645 }
3646}
3647
3648
3649#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3650# ifdef LOG_ENABLED
3651/** Host CPU SIMD register names. */
3652DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3653{
3654# ifdef RT_ARCH_AMD64
3655 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3656# elif RT_ARCH_ARM64
3657 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3658 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3659# else
3660# error "port me"
3661# endif
3662};
3663# endif
3664
3665
3666/**
3667 * Frees a SIMD register assigned to a variable.
3668 *
3669 * The register will be disassociated from the variable.
3670 */
3671DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3672{
3673 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3674 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3675 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3676 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3677 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3678 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3679
3680 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3681 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3682 if (!fFlushShadows)
3683 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3684 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3685 else
3686 {
3687 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3688 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3689 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3690 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3691 uint64_t fGstRegShadows = fGstRegShadowsOld;
3692 while (fGstRegShadows)
3693 {
3694 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3695 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3696
3697 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3698 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3699 }
3700 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3701 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3702 }
3703}
3704
3705
3706/**
3707 * Reassigns a variable to a different SIMD register specified by the caller.
3708 *
3709 * @returns The new code buffer position.
3710 * @param pReNative The native recompile state.
3711 * @param off The current code buffer position.
3712 * @param idxVar The variable index.
3713 * @param idxRegOld The old host register number.
3714 * @param idxRegNew The new host register number.
3715 * @param pszCaller The caller for logging.
3716 */
3717static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3718 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3719{
3720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3721 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3722 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3723 RT_NOREF(pszCaller);
3724
3725 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3726 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3727 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3728
3729 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3730 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3731 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3732
3733 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3734 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3736
3737 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3738 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3739 else
3740 {
3741 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3742 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3743 }
3744
3745 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3746 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3747 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3748 if (fGstRegShadows)
3749 {
3750 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3751 | RT_BIT_32(idxRegNew);
3752 while (fGstRegShadows)
3753 {
3754 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3755 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3756
3757 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3758 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3759 }
3760 }
3761
3762 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3763 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3764 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3765 return off;
3766}
3767
3768
3769/**
3770 * Moves a variable to a different register or spills it onto the stack.
3771 *
3772 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3773 * kinds can easily be recreated if needed later.
3774 *
3775 * @returns The new code buffer position.
3776 * @param pReNative The native recompile state.
3777 * @param off The current code buffer position.
3778 * @param idxVar The variable index.
3779 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3780 * call-volatile registers.
3781 */
3782DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3783 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3784{
3785 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3786 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3787 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3788 Assert(!pVar->fRegAcquired);
3789 Assert(!pVar->fSimdReg);
3790
3791 uint8_t const idxRegOld = pVar->idxReg;
3792 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3793 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3794 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3795 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3796 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3797 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3798 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3799 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3800 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3801 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3802
3803 /** @todo Add statistics on this.*/
3804 /** @todo Implement basic variable liveness analysis (python) so variables
3805 * can be freed immediately once no longer used. This has the potential to
3806 * be trashing registers and stack for dead variables.
3807 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3808
3809 /*
3810 * First try move it to a different register, as that's cheaper.
3811 */
3812 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3813 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3814 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3815 if (fRegs)
3816 {
3817 /* Avoid using shadow registers, if possible. */
3818 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3819 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3820 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3821 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3822 }
3823
3824 /*
3825 * Otherwise we must spill the register onto the stack.
3826 */
3827 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3828 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3829 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3830
3831 if (pVar->cbVar == sizeof(RTUINT128U))
3832 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3833 else
3834 {
3835 Assert(pVar->cbVar == sizeof(RTUINT256U));
3836 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3837 }
3838
3839 pVar->idxReg = UINT8_MAX;
3840 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3841 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3842 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3843 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3844 return off;
3845}
3846
3847
3848/**
3849 * Called right before emitting a call instruction to move anything important
3850 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3851 * optionally freeing argument variables.
3852 *
3853 * @returns New code buffer offset, UINT32_MAX on failure.
3854 * @param pReNative The native recompile state.
3855 * @param off The code buffer offset.
3856 * @param cArgs The number of arguments the function call takes.
3857 * It is presumed that the host register part of these have
3858 * been allocated as such already and won't need moving,
3859 * just freeing.
3860 * @param fKeepVars Mask of variables that should keep their register
3861 * assignments. Caller must take care to handle these.
3862 */
3863DECL_HIDDEN_THROW(uint32_t)
3864iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3865{
3866 Assert(!cArgs); RT_NOREF(cArgs);
3867
3868 /* fKeepVars will reduce this mask. */
3869 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3870
3871 /*
3872 * Move anything important out of volatile registers.
3873 */
3874 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3875#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3876 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3877#endif
3878 ;
3879
3880 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3881 if (!fSimdRegsToMove)
3882 { /* likely */ }
3883 else
3884 {
3885 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3886 while (fSimdRegsToMove != 0)
3887 {
3888 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3889 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3890
3891 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3892 {
3893 case kIemNativeWhat_Var:
3894 {
3895 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3896 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3897 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3898 Assert(pVar->idxReg == idxSimdReg);
3899 Assert(pVar->fSimdReg);
3900 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3901 {
3902 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3903 idxVar, pVar->enmKind, pVar->idxReg));
3904 if (pVar->enmKind != kIemNativeVarKind_Stack)
3905 pVar->idxReg = UINT8_MAX;
3906 else
3907 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3908 }
3909 else
3910 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3911 continue;
3912 }
3913
3914 case kIemNativeWhat_Arg:
3915 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3916 continue;
3917
3918 case kIemNativeWhat_rc:
3919 case kIemNativeWhat_Tmp:
3920 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3921 continue;
3922
3923 case kIemNativeWhat_FixedReserved:
3924#ifdef RT_ARCH_ARM64
3925 continue; /* On ARM the upper half of the virtual 256-bit register. */
3926#endif
3927
3928 case kIemNativeWhat_FixedTmp:
3929 case kIemNativeWhat_pVCpuFixed:
3930 case kIemNativeWhat_pCtxFixed:
3931 case kIemNativeWhat_PcShadow:
3932 case kIemNativeWhat_Invalid:
3933 case kIemNativeWhat_End:
3934 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3935 }
3936 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3937 }
3938 }
3939
3940 /*
3941 * Do the actual freeing.
3942 */
3943 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3944 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3945 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3946 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3947
3948 /* If there are guest register shadows in any call-volatile register, we
3949 have to clear the corrsponding guest register masks for each register. */
3950 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3951 if (fHstSimdRegsWithGstShadow)
3952 {
3953 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3954 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
3955 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
3956 do
3957 {
3958 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
3959 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
3960
3961 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
3962
3963#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3964 /*
3965 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
3966 * to call volatile registers).
3967 */
3968 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3969 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
3970 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
3971#endif
3972 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3973 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
3974
3975 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
3976 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
3977 } while (fHstSimdRegsWithGstShadow != 0);
3978 }
3979
3980 return off;
3981}
3982#endif
3983
3984
3985/**
3986 * Called right before emitting a call instruction to move anything important
3987 * out of call-volatile registers, free and flush the call-volatile registers,
3988 * optionally freeing argument variables.
3989 *
3990 * @returns New code buffer offset, UINT32_MAX on failure.
3991 * @param pReNative The native recompile state.
3992 * @param off The code buffer offset.
3993 * @param cArgs The number of arguments the function call takes.
3994 * It is presumed that the host register part of these have
3995 * been allocated as such already and won't need moving,
3996 * just freeing.
3997 * @param fKeepVars Mask of variables that should keep their register
3998 * assignments. Caller must take care to handle these.
3999 */
4000DECL_HIDDEN_THROW(uint32_t)
4001iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4002{
4003 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4004
4005 /* fKeepVars will reduce this mask. */
4006 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4007
4008 /*
4009 * Move anything important out of volatile registers.
4010 */
4011 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4012 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4013 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4014#ifdef IEMNATIVE_REG_FIXED_TMP0
4015 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4016#endif
4017#ifdef IEMNATIVE_REG_FIXED_TMP1
4018 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4019#endif
4020#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4021 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4022#endif
4023 & ~g_afIemNativeCallRegs[cArgs];
4024
4025 fRegsToMove &= pReNative->Core.bmHstRegs;
4026 if (!fRegsToMove)
4027 { /* likely */ }
4028 else
4029 {
4030 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4031 while (fRegsToMove != 0)
4032 {
4033 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4034 fRegsToMove &= ~RT_BIT_32(idxReg);
4035
4036 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4037 {
4038 case kIemNativeWhat_Var:
4039 {
4040 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4041 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4042 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4043 Assert(pVar->idxReg == idxReg);
4044#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4045 Assert(!pVar->fSimdReg);
4046#endif
4047 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4048 {
4049 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4050 idxVar, pVar->enmKind, pVar->idxReg));
4051 if (pVar->enmKind != kIemNativeVarKind_Stack)
4052 pVar->idxReg = UINT8_MAX;
4053 else
4054 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4055 }
4056 else
4057 fRegsToFree &= ~RT_BIT_32(idxReg);
4058 continue;
4059 }
4060
4061 case kIemNativeWhat_Arg:
4062 AssertMsgFailed(("What?!?: %u\n", idxReg));
4063 continue;
4064
4065 case kIemNativeWhat_rc:
4066 case kIemNativeWhat_Tmp:
4067 AssertMsgFailed(("Missing free: %u\n", idxReg));
4068 continue;
4069
4070 case kIemNativeWhat_FixedTmp:
4071 case kIemNativeWhat_pVCpuFixed:
4072 case kIemNativeWhat_pCtxFixed:
4073 case kIemNativeWhat_PcShadow:
4074 case kIemNativeWhat_FixedReserved:
4075 case kIemNativeWhat_Invalid:
4076 case kIemNativeWhat_End:
4077 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4078 }
4079 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4080 }
4081 }
4082
4083 /*
4084 * Do the actual freeing.
4085 */
4086 if (pReNative->Core.bmHstRegs & fRegsToFree)
4087 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4088 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4089 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4090
4091 /* If there are guest register shadows in any call-volatile register, we
4092 have to clear the corrsponding guest register masks for each register. */
4093 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4094 if (fHstRegsWithGstShadow)
4095 {
4096 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4097 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4098 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4099 do
4100 {
4101 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4102 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4103
4104 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4105
4106#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4107 /*
4108 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4109 * to call volatile registers).
4110 */
4111 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4112 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4113 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4114#endif
4115
4116 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4117 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4118 } while (fHstRegsWithGstShadow != 0);
4119 }
4120
4121#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4122 /* Now for the SIMD registers, no argument support for now. */
4123 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4124#endif
4125
4126 return off;
4127}
4128
4129
4130/**
4131 * Flushes a set of guest register shadow copies.
4132 *
4133 * This is usually done after calling a threaded function or a C-implementation
4134 * of an instruction.
4135 *
4136 * @param pReNative The native recompile state.
4137 * @param fGstRegs Set of guest registers to flush.
4138 */
4139DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4140{
4141 /*
4142 * Reduce the mask by what's currently shadowed
4143 */
4144 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4145 fGstRegs &= bmGstRegShadowsOld;
4146 if (fGstRegs)
4147 {
4148 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4149 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4150 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4151 if (bmGstRegShadowsNew)
4152 {
4153 /*
4154 * Partial.
4155 */
4156 do
4157 {
4158 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4159 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4160 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4161 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4162 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4163#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4164 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4165#endif
4166
4167 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4168 fGstRegs &= ~fInThisHstReg;
4169 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4170 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4171 if (!fGstRegShadowsNew)
4172 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4173 } while (fGstRegs != 0);
4174 }
4175 else
4176 {
4177 /*
4178 * Clear all.
4179 */
4180 do
4181 {
4182 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4183 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4184 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4185 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4186 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4187#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4188 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4189#endif
4190
4191 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4192 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4193 } while (fGstRegs != 0);
4194 pReNative->Core.bmHstRegsWithGstShadow = 0;
4195 }
4196 }
4197}
4198
4199
4200/**
4201 * Flushes guest register shadow copies held by a set of host registers.
4202 *
4203 * This is used with the TLB lookup code for ensuring that we don't carry on
4204 * with any guest shadows in volatile registers, as these will get corrupted by
4205 * a TLB miss.
4206 *
4207 * @param pReNative The native recompile state.
4208 * @param fHstRegs Set of host registers to flush guest shadows for.
4209 */
4210DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4211{
4212 /*
4213 * Reduce the mask by what's currently shadowed.
4214 */
4215 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4216 fHstRegs &= bmHstRegsWithGstShadowOld;
4217 if (fHstRegs)
4218 {
4219 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4220 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4221 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4222 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4223 if (bmHstRegsWithGstShadowNew)
4224 {
4225 /*
4226 * Partial (likely).
4227 */
4228 uint64_t fGstShadows = 0;
4229 do
4230 {
4231 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4232 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4233 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4234 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4235#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4236 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4237#endif
4238
4239 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4240 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4241 fHstRegs &= ~RT_BIT_32(idxHstReg);
4242 } while (fHstRegs != 0);
4243 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4244 }
4245 else
4246 {
4247 /*
4248 * Clear all.
4249 */
4250 do
4251 {
4252 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4253 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4254 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4255 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4256#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4257 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4258#endif
4259
4260 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4261 fHstRegs &= ~RT_BIT_32(idxHstReg);
4262 } while (fHstRegs != 0);
4263 pReNative->Core.bmGstRegShadows = 0;
4264 }
4265 }
4266}
4267
4268
4269/**
4270 * Restores guest shadow copies in volatile registers.
4271 *
4272 * This is used after calling a helper function (think TLB miss) to restore the
4273 * register state of volatile registers.
4274 *
4275 * @param pReNative The native recompile state.
4276 * @param off The code buffer offset.
4277 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4278 * be active (allocated) w/o asserting. Hack.
4279 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4280 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4281 */
4282DECL_HIDDEN_THROW(uint32_t)
4283iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4284{
4285 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4286 if (fHstRegs)
4287 {
4288 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4289 do
4290 {
4291 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4292
4293 /* It's not fatal if a register is active holding a variable that
4294 shadowing a guest register, ASSUMING all pending guest register
4295 writes were flushed prior to the helper call. However, we'll be
4296 emitting duplicate restores, so it wasts code space. */
4297 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4298 RT_NOREF(fHstRegsActiveShadows);
4299
4300 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4301#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4302 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4303#endif
4304 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4305 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4307
4308 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4309 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4310
4311 fHstRegs &= ~RT_BIT_32(idxHstReg);
4312 } while (fHstRegs != 0);
4313 }
4314 return off;
4315}
4316
4317
4318
4319
4320/*********************************************************************************************************************************
4321* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4322*********************************************************************************************************************************/
4323#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4324
4325/**
4326 * Info about shadowed guest SIMD register values.
4327 * @see IEMNATIVEGSTSIMDREG
4328 */
4329static struct
4330{
4331 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4332 uint32_t offXmm;
4333 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4334 uint32_t offYmm;
4335 /** Name (for logging). */
4336 const char *pszName;
4337} const g_aGstSimdShadowInfo[] =
4338{
4339#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4340 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4341 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4342 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4343 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4344 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4345 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4346 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4347 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4348 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4349 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4350 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4351 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4352 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4353 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4354 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4355 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4356 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4357#undef CPUMCTX_OFF_AND_SIZE
4358};
4359AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4360
4361
4362/**
4363 * Frees a temporary SIMD register.
4364 *
4365 * Any shadow copies of guest registers assigned to the host register will not
4366 * be flushed by this operation.
4367 */
4368DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4369{
4370 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4371 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4372 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4373 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4374 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4375}
4376
4377
4378/**
4379 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4380 *
4381 * @returns New code bufferoffset.
4382 * @param pReNative The native recompile state.
4383 * @param off Current code buffer position.
4384 * @param enmGstSimdReg The guest SIMD register to flush.
4385 */
4386DECL_HIDDEN_THROW(uint32_t)
4387iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4388{
4389 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4390
4391 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4392 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4393 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4394 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4395
4396 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4397 {
4398 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4399 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4400 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4401 }
4402
4403 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4404 {
4405 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4406 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4407 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4408 }
4409
4410 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4411 return off;
4412}
4413
4414
4415/**
4416 * Flush the given set of guest SIMD registers if marked as dirty.
4417 *
4418 * @returns New code buffer offset.
4419 * @param pReNative The native recompile state.
4420 * @param off Current code buffer position.
4421 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4422 */
4423DECL_HIDDEN_THROW(uint32_t)
4424iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4425{
4426 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4427 & fFlushGstSimdReg;
4428 if (bmGstSimdRegShadowDirty)
4429 {
4430# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4431 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4432 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4433# endif
4434
4435 do
4436 {
4437 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4438 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4439 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4440 } while (bmGstSimdRegShadowDirty);
4441 }
4442
4443 return off;
4444}
4445
4446
4447#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4448/**
4449 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4450 *
4451 * @returns New code buffer offset.
4452 * @param pReNative The native recompile state.
4453 * @param off Current code buffer position.
4454 * @param idxHstSimdReg The host SIMD register.
4455 *
4456 * @note This doesn't do any unshadowing of guest registers from the host register.
4457 */
4458DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4459{
4460 /* We need to flush any pending guest register writes this host register shadows. */
4461 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4462 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4463 if (bmGstSimdRegShadowDirty)
4464 {
4465# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4466 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4467 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4468# endif
4469
4470 do
4471 {
4472 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4473 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4474 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4475 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4476 } while (bmGstSimdRegShadowDirty);
4477 }
4478
4479 return off;
4480}
4481#endif
4482
4483
4484/**
4485 * Locate a register, possibly freeing one up.
4486 *
4487 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4488 * failed.
4489 *
4490 * @returns Host register number on success. Returns UINT8_MAX if no registers
4491 * found, the caller is supposed to deal with this and raise a
4492 * allocation type specific status code (if desired).
4493 *
4494 * @throws VBox status code if we're run into trouble spilling a variable of
4495 * recording debug info. Does NOT throw anything if we're out of
4496 * registers, though.
4497 */
4498static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4499 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4500{
4501 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4502 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4503 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4504
4505 /*
4506 * Try a freed register that's shadowing a guest register.
4507 */
4508 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4509 if (fRegs)
4510 {
4511 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4512
4513#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4514 /*
4515 * When we have livness information, we use it to kick out all shadowed
4516 * guest register that will not be needed any more in this TB. If we're
4517 * lucky, this may prevent us from ending up here again.
4518 *
4519 * Note! We must consider the previous entry here so we don't free
4520 * anything that the current threaded function requires (current
4521 * entry is produced by the next threaded function).
4522 */
4523 uint32_t const idxCurCall = pReNative->idxCurCall;
4524 if (idxCurCall > 0)
4525 {
4526 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4527
4528# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4529 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4530 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4531 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4532#else
4533 /* Construct a mask of the registers not in the read or write state.
4534 Note! We could skips writes, if they aren't from us, as this is just
4535 a hack to prevent trashing registers that have just been written
4536 or will be written when we retire the current instruction. */
4537 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4538 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4539 & IEMLIVENESSBIT_MASK;
4540#endif
4541 /* If it matches any shadowed registers. */
4542 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4543 {
4544 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4545 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4546 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4547
4548 /* See if we've got any unshadowed registers we can return now. */
4549 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4550 if (fUnshadowedRegs)
4551 {
4552 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4553 return (fPreferVolatile
4554 ? ASMBitFirstSetU32(fUnshadowedRegs)
4555 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4556 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4557 - 1;
4558 }
4559 }
4560 }
4561#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4562
4563 unsigned const idxReg = (fPreferVolatile
4564 ? ASMBitFirstSetU32(fRegs)
4565 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4566 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4567 - 1;
4568
4569 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4570 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4571 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4572 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4573
4574 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4575 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4576
4577 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4578 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4579 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4580 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4581 return idxReg;
4582 }
4583
4584 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4585
4586 /*
4587 * Try free up a variable that's in a register.
4588 *
4589 * We do two rounds here, first evacuating variables we don't need to be
4590 * saved on the stack, then in the second round move things to the stack.
4591 */
4592 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4593 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4594 {
4595 uint32_t fVars = pReNative->Core.bmVars;
4596 while (fVars)
4597 {
4598 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4599 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4600 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4601 continue;
4602
4603 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4604 && (RT_BIT_32(idxReg) & fRegMask)
4605 && ( iLoop == 0
4606 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4607 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4608 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4609 {
4610 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4611 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4612 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4613 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4614 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4615 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4616
4617 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4618 {
4619 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4620 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4621 }
4622
4623 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4624 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4625
4626 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4627 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4628 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4629 return idxReg;
4630 }
4631 fVars &= ~RT_BIT_32(idxVar);
4632 }
4633 }
4634
4635 AssertFailed();
4636 return UINT8_MAX;
4637}
4638
4639
4640/**
4641 * Flushes a set of guest register shadow copies.
4642 *
4643 * This is usually done after calling a threaded function or a C-implementation
4644 * of an instruction.
4645 *
4646 * @param pReNative The native recompile state.
4647 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4648 */
4649DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4650{
4651 /*
4652 * Reduce the mask by what's currently shadowed
4653 */
4654 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4655 fGstSimdRegs &= bmGstSimdRegShadows;
4656 if (fGstSimdRegs)
4657 {
4658 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4659 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4660 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4661 if (bmGstSimdRegShadowsNew)
4662 {
4663 /*
4664 * Partial.
4665 */
4666 do
4667 {
4668 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4669 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4670 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4671 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4672 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4673 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4674
4675 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4676 fGstSimdRegs &= ~fInThisHstReg;
4677 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4678 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4679 if (!fGstRegShadowsNew)
4680 {
4681 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4682 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4683 }
4684 } while (fGstSimdRegs != 0);
4685 }
4686 else
4687 {
4688 /*
4689 * Clear all.
4690 */
4691 do
4692 {
4693 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4694 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4695 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4696 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4697 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4698 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4699
4700 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4701 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4702 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4703 } while (fGstSimdRegs != 0);
4704 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4705 }
4706 }
4707}
4708
4709
4710/**
4711 * Allocates a temporary host SIMD register.
4712 *
4713 * This may emit code to save register content onto the stack in order to free
4714 * up a register.
4715 *
4716 * @returns The host register number; throws VBox status code on failure,
4717 * so no need to check the return value.
4718 * @param pReNative The native recompile state.
4719 * @param poff Pointer to the variable with the code buffer position.
4720 * This will be update if we need to move a variable from
4721 * register to stack in order to satisfy the request.
4722 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4723 * registers (@c true, default) or the other way around
4724 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4725 */
4726DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4727{
4728 /*
4729 * Try find a completely unused register, preferably a call-volatile one.
4730 */
4731 uint8_t idxSimdReg;
4732 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4733 & ~pReNative->Core.bmHstRegsWithGstShadow
4734 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4735 if (fRegs)
4736 {
4737 if (fPreferVolatile)
4738 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4739 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4740 else
4741 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4742 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4743 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4744 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4745
4746 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4747 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4748 }
4749 else
4750 {
4751 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4752 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4753 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4754 }
4755
4756 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4757 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4758}
4759
4760
4761/**
4762 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4763 * registers.
4764 *
4765 * @returns The host register number; throws VBox status code on failure,
4766 * so no need to check the return value.
4767 * @param pReNative The native recompile state.
4768 * @param poff Pointer to the variable with the code buffer position.
4769 * This will be update if we need to move a variable from
4770 * register to stack in order to satisfy the request.
4771 * @param fRegMask Mask of acceptable registers.
4772 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4773 * registers (@c true, default) or the other way around
4774 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4775 */
4776DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4777 bool fPreferVolatile /*= true*/)
4778{
4779 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4780 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4781
4782 /*
4783 * Try find a completely unused register, preferably a call-volatile one.
4784 */
4785 uint8_t idxSimdReg;
4786 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4787 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4788 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4789 & fRegMask;
4790 if (fRegs)
4791 {
4792 if (fPreferVolatile)
4793 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4794 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4795 else
4796 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4797 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4798 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4799 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4800
4801 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4802 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4803 }
4804 else
4805 {
4806 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4807 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4808 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4809 }
4810
4811 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4812 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4813}
4814
4815
4816/**
4817 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4818 *
4819 * @param pReNative The native recompile state.
4820 * @param idxHstSimdReg The host SIMD register to update the state for.
4821 * @param enmLoadSz The load size to set.
4822 */
4823DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4824 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4825{
4826 /* Everything valid already? -> nothing to do. */
4827 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4828 return;
4829
4830 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4831 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4832 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4833 {
4834 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4835 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4836 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4837 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4838 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4839 }
4840}
4841
4842
4843static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4844 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4845{
4846 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4847 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4848 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4849 {
4850# ifdef RT_ARCH_ARM64
4851 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4852 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4853# endif
4854
4855 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4856 {
4857 switch (enmLoadSzDst)
4858 {
4859 case kIemNativeGstSimdRegLdStSz_256:
4860 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4861 break;
4862 case kIemNativeGstSimdRegLdStSz_Low128:
4863 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4864 break;
4865 case kIemNativeGstSimdRegLdStSz_High128:
4866 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4867 break;
4868 default:
4869 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4870 }
4871
4872 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4873 }
4874 }
4875 else
4876 {
4877 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4878 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4879 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4880 }
4881
4882 return off;
4883}
4884
4885
4886/**
4887 * Allocates a temporary host SIMD register for keeping a guest
4888 * SIMD register value.
4889 *
4890 * Since we may already have a register holding the guest register value,
4891 * code will be emitted to do the loading if that's not the case. Code may also
4892 * be emitted if we have to free up a register to satify the request.
4893 *
4894 * @returns The host register number; throws VBox status code on failure, so no
4895 * need to check the return value.
4896 * @param pReNative The native recompile state.
4897 * @param poff Pointer to the variable with the code buffer
4898 * position. This will be update if we need to move a
4899 * variable from register to stack in order to satisfy
4900 * the request.
4901 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4902 * @param enmIntendedUse How the caller will be using the host register.
4903 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4904 * register is okay (default). The ASSUMPTION here is
4905 * that the caller has already flushed all volatile
4906 * registers, so this is only applied if we allocate a
4907 * new register.
4908 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4909 */
4910DECL_HIDDEN_THROW(uint8_t)
4911iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4912 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4913 bool fNoVolatileRegs /*= false*/)
4914{
4915 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4916#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4917 AssertMsg( pReNative->idxCurCall == 0
4918 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4919 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4920 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4921 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4922 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4923 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4924#endif
4925#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4926 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4927#endif
4928 uint32_t const fRegMask = !fNoVolatileRegs
4929 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4930 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4931
4932 /*
4933 * First check if the guest register value is already in a host register.
4934 */
4935 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4936 {
4937 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4938 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4939 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4940 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4941
4942 /* It's not supposed to be allocated... */
4943 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4944 {
4945 /*
4946 * If the register will trash the guest shadow copy, try find a
4947 * completely unused register we can use instead. If that fails,
4948 * we need to disassociate the host reg from the guest reg.
4949 */
4950 /** @todo would be nice to know if preserving the register is in any way helpful. */
4951 /* If the purpose is calculations, try duplicate the register value as
4952 we'll be clobbering the shadow. */
4953 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4954 && ( ~pReNative->Core.bmHstSimdRegs
4955 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4956 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
4957 {
4958 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
4959
4960 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
4961
4962 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
4963 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
4964 g_apszIemNativeHstSimdRegNames[idxRegNew]));
4965 idxSimdReg = idxRegNew;
4966 }
4967 /* If the current register matches the restrictions, go ahead and allocate
4968 it for the caller. */
4969 else if (fRegMask & RT_BIT_32(idxSimdReg))
4970 {
4971 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
4972 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
4973 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4974 {
4975 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4976 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
4977 else
4978 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
4979 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
4980 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4981 }
4982 else
4983 {
4984 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
4985 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
4986 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
4987 }
4988 }
4989 /* Otherwise, allocate a register that satisfies the caller and transfer
4990 the shadowing if compatible with the intended use. (This basically
4991 means the call wants a non-volatile register (RSP push/pop scenario).) */
4992 else
4993 {
4994 Assert(fNoVolatileRegs);
4995 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
4996 !fNoVolatileRegs
4997 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4998 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
4999 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5000 {
5001 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5002 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5003 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5004 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5005 }
5006 else
5007 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5008 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5009 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5010 idxSimdReg = idxRegNew;
5011 }
5012 }
5013 else
5014 {
5015 /*
5016 * Oops. Shadowed guest register already allocated!
5017 *
5018 * Allocate a new register, copy the value and, if updating, the
5019 * guest shadow copy assignment to the new register.
5020 */
5021 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5022 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5023 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5024 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5025
5026 /** @todo share register for readonly access. */
5027 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5028 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5029
5030 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5031 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5032 else
5033 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5034
5035 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5036 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5037 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5038 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5039 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5040 else
5041 {
5042 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5043 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5044 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5045 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5046 }
5047 idxSimdReg = idxRegNew;
5048 }
5049 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5050
5051#ifdef VBOX_STRICT
5052 /* Strict builds: Check that the value is correct. */
5053 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5054 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5055#endif
5056
5057 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5058 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5059 {
5060# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5061 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5062 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5063# endif
5064
5065 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5066 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5067 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5068 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5069 else
5070 {
5071 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5072 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5073 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5074 }
5075 }
5076
5077 return idxSimdReg;
5078 }
5079
5080 /*
5081 * Allocate a new register, load it with the guest value and designate it as a copy of the
5082 */
5083 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5084
5085 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5086 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5087 else
5088 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5089
5090 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5091 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5092
5093 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5094 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5095 {
5096# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5097 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5098 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5099# endif
5100
5101 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5102 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5103 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5104 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5105 else
5106 {
5107 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5108 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5109 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5110 }
5111 }
5112
5113 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5114 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5115
5116 return idxRegNew;
5117}
5118
5119
5120/**
5121 * Flushes guest SIMD register shadow copies held by a set of host registers.
5122 *
5123 * This is used whenever calling an external helper for ensuring that we don't carry on
5124 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5125 *
5126 * @param pReNative The native recompile state.
5127 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5128 */
5129DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5130{
5131 /*
5132 * Reduce the mask by what's currently shadowed.
5133 */
5134 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5135 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5136 if (fHstSimdRegs)
5137 {
5138 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5139 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5140 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5141 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5142 if (bmHstSimdRegsWithGstShadowNew)
5143 {
5144 /*
5145 * Partial (likely).
5146 */
5147 uint64_t fGstShadows = 0;
5148 do
5149 {
5150 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5151 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5152 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5153 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5154 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5155 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5156
5157 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5158 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5159 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5160 } while (fHstSimdRegs != 0);
5161 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5162 }
5163 else
5164 {
5165 /*
5166 * Clear all.
5167 */
5168 do
5169 {
5170 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5171 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5172 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5173 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5174 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5175 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5176
5177 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5178 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5179 } while (fHstSimdRegs != 0);
5180 pReNative->Core.bmGstSimdRegShadows = 0;
5181 }
5182 }
5183}
5184#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5185
5186
5187
5188/*********************************************************************************************************************************
5189* Code emitters for flushing pending guest register writes and sanity checks *
5190*********************************************************************************************************************************/
5191
5192#ifdef VBOX_STRICT
5193/**
5194 * Does internal register allocator sanity checks.
5195 */
5196DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5197{
5198 /*
5199 * Iterate host registers building a guest shadowing set.
5200 */
5201 uint64_t bmGstRegShadows = 0;
5202 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5203 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5204 while (bmHstRegsWithGstShadow)
5205 {
5206 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5207 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5208 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5209
5210 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5211 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5212 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5213 bmGstRegShadows |= fThisGstRegShadows;
5214 while (fThisGstRegShadows)
5215 {
5216 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5217 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5218 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5219 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5220 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5221 }
5222 }
5223 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5224 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5225 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5226
5227 /*
5228 * Now the other way around, checking the guest to host index array.
5229 */
5230 bmHstRegsWithGstShadow = 0;
5231 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5232 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5233 while (bmGstRegShadows)
5234 {
5235 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5236 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5237 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5238
5239 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5240 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5241 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5242 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5243 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5244 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5245 }
5246 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5247 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5248 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5249}
5250#endif /* VBOX_STRICT */
5251
5252
5253/**
5254 * Flushes any delayed guest register writes.
5255 *
5256 * This must be called prior to calling CImpl functions and any helpers that use
5257 * the guest state (like raising exceptions) and such.
5258 *
5259 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5260 * the caller if it wishes to do so.
5261 */
5262DECL_HIDDEN_THROW(uint32_t)
5263iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5264{
5265#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5266 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5267 off = iemNativeEmitPcWriteback(pReNative, off);
5268#else
5269 RT_NOREF(pReNative, fGstShwExcept);
5270#endif
5271
5272#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5273 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5274#endif
5275
5276#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5277 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5278#endif
5279
5280 return off;
5281}
5282
5283
5284#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5285/**
5286 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5287 */
5288DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5289{
5290 Assert(pReNative->Core.offPc);
5291# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5292 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5293 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5294# endif
5295
5296# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5297 /* Allocate a temporary PC register. */
5298 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5299
5300 /* Perform the addition and store the result. */
5301 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5302 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5303
5304 /* Free but don't flush the PC register. */
5305 iemNativeRegFreeTmp(pReNative, idxPcReg);
5306# else
5307 /* Compare the shadow with the context value, they should match. */
5308 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5309 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5310# endif
5311
5312 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5313 pReNative->Core.offPc = 0;
5314 pReNative->Core.cInstrPcUpdateSkipped = 0;
5315
5316 return off;
5317}
5318#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5319
5320
5321/*********************************************************************************************************************************
5322* Code Emitters (larger snippets) *
5323*********************************************************************************************************************************/
5324
5325/**
5326 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5327 * extending to 64-bit width.
5328 *
5329 * @returns New code buffer offset on success, UINT32_MAX on failure.
5330 * @param pReNative .
5331 * @param off The current code buffer position.
5332 * @param idxHstReg The host register to load the guest register value into.
5333 * @param enmGstReg The guest register to load.
5334 *
5335 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5336 * that is something the caller needs to do if applicable.
5337 */
5338DECL_HIDDEN_THROW(uint32_t)
5339iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5340{
5341 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5342 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5343
5344 switch (g_aGstShadowInfo[enmGstReg].cb)
5345 {
5346 case sizeof(uint64_t):
5347 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5348 case sizeof(uint32_t):
5349 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5350 case sizeof(uint16_t):
5351 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5352#if 0 /* not present in the table. */
5353 case sizeof(uint8_t):
5354 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5355#endif
5356 default:
5357 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5358 }
5359}
5360
5361
5362#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5363/**
5364 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5365 *
5366 * @returns New code buffer offset on success, UINT32_MAX on failure.
5367 * @param pReNative The recompiler state.
5368 * @param off The current code buffer position.
5369 * @param idxHstSimdReg The host register to load the guest register value into.
5370 * @param enmGstSimdReg The guest register to load.
5371 * @param enmLoadSz The load size of the register.
5372 *
5373 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5374 * that is something the caller needs to do if applicable.
5375 */
5376DECL_HIDDEN_THROW(uint32_t)
5377iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5378 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5379{
5380 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5381
5382 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5383 switch (enmLoadSz)
5384 {
5385 case kIemNativeGstSimdRegLdStSz_256:
5386 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5387 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5388 case kIemNativeGstSimdRegLdStSz_Low128:
5389 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5390 case kIemNativeGstSimdRegLdStSz_High128:
5391 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5392 default:
5393 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5394 }
5395}
5396#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5397
5398#ifdef VBOX_STRICT
5399
5400/**
5401 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5402 *
5403 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5404 * Trashes EFLAGS on AMD64.
5405 */
5406DECL_HIDDEN_THROW(uint32_t)
5407iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5408{
5409# ifdef RT_ARCH_AMD64
5410 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5411
5412 /* rol reg64, 32 */
5413 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5414 pbCodeBuf[off++] = 0xc1;
5415 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5416 pbCodeBuf[off++] = 32;
5417
5418 /* test reg32, ffffffffh */
5419 if (idxReg >= 8)
5420 pbCodeBuf[off++] = X86_OP_REX_B;
5421 pbCodeBuf[off++] = 0xf7;
5422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5423 pbCodeBuf[off++] = 0xff;
5424 pbCodeBuf[off++] = 0xff;
5425 pbCodeBuf[off++] = 0xff;
5426 pbCodeBuf[off++] = 0xff;
5427
5428 /* je/jz +1 */
5429 pbCodeBuf[off++] = 0x74;
5430 pbCodeBuf[off++] = 0x01;
5431
5432 /* int3 */
5433 pbCodeBuf[off++] = 0xcc;
5434
5435 /* rol reg64, 32 */
5436 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5437 pbCodeBuf[off++] = 0xc1;
5438 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5439 pbCodeBuf[off++] = 32;
5440
5441# elif defined(RT_ARCH_ARM64)
5442 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5443 /* lsr tmp0, reg64, #32 */
5444 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5445 /* cbz tmp0, +1 */
5446 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5447 /* brk #0x1100 */
5448 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5449
5450# else
5451# error "Port me!"
5452# endif
5453 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5454 return off;
5455}
5456
5457
5458/**
5459 * Emitting code that checks that the content of register @a idxReg is the same
5460 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5461 * instruction if that's not the case.
5462 *
5463 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5464 * Trashes EFLAGS on AMD64.
5465 */
5466DECL_HIDDEN_THROW(uint32_t)
5467iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5468{
5469#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5470 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5471 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5472 return off;
5473#endif
5474
5475# ifdef RT_ARCH_AMD64
5476 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5477
5478 /* cmp reg, [mem] */
5479 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5480 {
5481 if (idxReg >= 8)
5482 pbCodeBuf[off++] = X86_OP_REX_R;
5483 pbCodeBuf[off++] = 0x38;
5484 }
5485 else
5486 {
5487 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5488 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5489 else
5490 {
5491 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5492 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5493 else
5494 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5495 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5496 if (idxReg >= 8)
5497 pbCodeBuf[off++] = X86_OP_REX_R;
5498 }
5499 pbCodeBuf[off++] = 0x39;
5500 }
5501 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5502
5503 /* je/jz +1 */
5504 pbCodeBuf[off++] = 0x74;
5505 pbCodeBuf[off++] = 0x01;
5506
5507 /* int3 */
5508 pbCodeBuf[off++] = 0xcc;
5509
5510 /* For values smaller than the register size, we must check that the rest
5511 of the register is all zeros. */
5512 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5513 {
5514 /* test reg64, imm32 */
5515 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5516 pbCodeBuf[off++] = 0xf7;
5517 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5518 pbCodeBuf[off++] = 0;
5519 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5520 pbCodeBuf[off++] = 0xff;
5521 pbCodeBuf[off++] = 0xff;
5522
5523 /* je/jz +1 */
5524 pbCodeBuf[off++] = 0x74;
5525 pbCodeBuf[off++] = 0x01;
5526
5527 /* int3 */
5528 pbCodeBuf[off++] = 0xcc;
5529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5530 }
5531 else
5532 {
5533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5534 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5535 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5536 }
5537
5538# elif defined(RT_ARCH_ARM64)
5539 /* mov TMP0, [gstreg] */
5540 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5541
5542 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5543 /* sub tmp0, tmp0, idxReg */
5544 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5545 /* cbz tmp0, +1 */
5546 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5547 /* brk #0x1000+enmGstReg */
5548 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5549 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5550
5551# else
5552# error "Port me!"
5553# endif
5554 return off;
5555}
5556
5557
5558# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5559# ifdef RT_ARCH_AMD64
5560/**
5561 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5562 */
5563DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5564{
5565 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5566 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5567 if (idxSimdReg >= 8)
5568 pbCodeBuf[off++] = X86_OP_REX_R;
5569 pbCodeBuf[off++] = 0x0f;
5570 pbCodeBuf[off++] = 0x38;
5571 pbCodeBuf[off++] = 0x29;
5572 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5573
5574 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5575 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5576 pbCodeBuf[off++] = X86_OP_REX_W
5577 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5578 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5579 pbCodeBuf[off++] = 0x0f;
5580 pbCodeBuf[off++] = 0x3a;
5581 pbCodeBuf[off++] = 0x16;
5582 pbCodeBuf[off++] = 0xeb;
5583 pbCodeBuf[off++] = 0x00;
5584
5585 /* cmp tmp0, 0xffffffffffffffff. */
5586 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5587 pbCodeBuf[off++] = 0x83;
5588 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5589 pbCodeBuf[off++] = 0xff;
5590
5591 /* je/jz +1 */
5592 pbCodeBuf[off++] = 0x74;
5593 pbCodeBuf[off++] = 0x01;
5594
5595 /* int3 */
5596 pbCodeBuf[off++] = 0xcc;
5597
5598 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5599 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5600 pbCodeBuf[off++] = X86_OP_REX_W
5601 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5602 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5603 pbCodeBuf[off++] = 0x0f;
5604 pbCodeBuf[off++] = 0x3a;
5605 pbCodeBuf[off++] = 0x16;
5606 pbCodeBuf[off++] = 0xeb;
5607 pbCodeBuf[off++] = 0x01;
5608
5609 /* cmp tmp0, 0xffffffffffffffff. */
5610 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5611 pbCodeBuf[off++] = 0x83;
5612 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5613 pbCodeBuf[off++] = 0xff;
5614
5615 /* je/jz +1 */
5616 pbCodeBuf[off++] = 0x74;
5617 pbCodeBuf[off++] = 0x01;
5618
5619 /* int3 */
5620 pbCodeBuf[off++] = 0xcc;
5621
5622 return off;
5623}
5624# endif
5625
5626
5627/**
5628 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5629 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5630 * instruction if that's not the case.
5631 *
5632 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5633 * Trashes EFLAGS on AMD64.
5634 */
5635DECL_HIDDEN_THROW(uint32_t)
5636iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5637 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5638{
5639 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5640 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5641 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5642 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5643 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5644 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5645 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5646 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5647 return off;
5648
5649# ifdef RT_ARCH_AMD64
5650 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5651 {
5652 /* movdqa vectmp0, idxSimdReg */
5653 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5654
5655 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5656
5657 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5658 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5659 }
5660
5661 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5662 {
5663 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5664 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5665
5666 /* vextracti128 vectmp0, idxSimdReg, 1 */
5667 pbCodeBuf[off++] = X86_OP_VEX3;
5668 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5669 | X86_OP_VEX3_BYTE1_X
5670 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5671 | 0x03; /* Opcode map */
5672 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5673 pbCodeBuf[off++] = 0x39;
5674 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5675 pbCodeBuf[off++] = 0x01;
5676
5677 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5678 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5679 }
5680# elif defined(RT_ARCH_ARM64)
5681 /* mov vectmp0, [gstreg] */
5682 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5683
5684 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5685 {
5686 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5687 /* eor vectmp0, vectmp0, idxSimdReg */
5688 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5689 /* uaddlv vectmp0, vectmp0.16B */
5690 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5691 /* umov tmp0, vectmp0.H[0] */
5692 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5693 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5694 /* cbz tmp0, +1 */
5695 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5696 /* brk #0x1000+enmGstReg */
5697 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5698 }
5699
5700 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5701 {
5702 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5703 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5704 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5705 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5706 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5707 /* umov tmp0, (vectmp0 + 1).H[0] */
5708 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5709 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5710 /* cbz tmp0, +1 */
5711 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5712 /* brk #0x1000+enmGstReg */
5713 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5714 }
5715
5716# else
5717# error "Port me!"
5718# endif
5719
5720 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5721 return off;
5722}
5723# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5724
5725
5726/**
5727 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5728 * important bits.
5729 *
5730 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5731 * Trashes EFLAGS on AMD64.
5732 */
5733DECL_HIDDEN_THROW(uint32_t)
5734iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5735{
5736 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5737 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5738 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5739 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5740
5741#ifdef RT_ARCH_AMD64
5742 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5743
5744 /* je/jz +1 */
5745 pbCodeBuf[off++] = 0x74;
5746 pbCodeBuf[off++] = 0x01;
5747
5748 /* int3 */
5749 pbCodeBuf[off++] = 0xcc;
5750
5751# elif defined(RT_ARCH_ARM64)
5752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5753
5754 /* b.eq +1 */
5755 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5756 /* brk #0x2000 */
5757 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5758
5759# else
5760# error "Port me!"
5761# endif
5762 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5763
5764 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5765 return off;
5766}
5767
5768#endif /* VBOX_STRICT */
5769
5770
5771#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5772/**
5773 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5774 */
5775DECL_HIDDEN_THROW(uint32_t)
5776iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5777{
5778 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5779
5780 fEflNeeded &= X86_EFL_STATUS_BITS;
5781 if (fEflNeeded)
5782 {
5783# ifdef RT_ARCH_AMD64
5784 /* test dword [pVCpu + offVCpu], imm32 */
5785 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5786 if (fEflNeeded <= 0xff)
5787 {
5788 pCodeBuf[off++] = 0xf6;
5789 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5790 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5791 }
5792 else
5793 {
5794 pCodeBuf[off++] = 0xf7;
5795 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5796 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5797 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5798 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5799 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5800 }
5801 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5802
5803# else
5804 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5805 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5806 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5807# ifdef RT_ARCH_ARM64
5808 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5809 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5810# else
5811# error "Port me!"
5812# endif
5813 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5814# endif
5815 }
5816 return off;
5817}
5818#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5819
5820
5821/**
5822 * Emits a code for checking the return code of a call and rcPassUp, returning
5823 * from the code if either are non-zero.
5824 */
5825DECL_HIDDEN_THROW(uint32_t)
5826iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5827{
5828#ifdef RT_ARCH_AMD64
5829 /*
5830 * AMD64: eax = call status code.
5831 */
5832
5833 /* edx = rcPassUp */
5834 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5835# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5836 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5837# endif
5838
5839 /* edx = eax | rcPassUp */
5840 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5841 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5842 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5844
5845 /* Jump to non-zero status return path. */
5846 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5847
5848 /* done. */
5849
5850#elif RT_ARCH_ARM64
5851 /*
5852 * ARM64: w0 = call status code.
5853 */
5854# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5855 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5856# endif
5857 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5858
5859 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5860
5861 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5862
5863 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5864 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5865 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5866
5867#else
5868# error "port me"
5869#endif
5870 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5871 RT_NOREF_PV(idxInstr);
5872 return off;
5873}
5874
5875
5876/**
5877 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5878 * raising a \#GP(0) if it isn't.
5879 *
5880 * @returns New code buffer offset, UINT32_MAX on failure.
5881 * @param pReNative The native recompile state.
5882 * @param off The code buffer offset.
5883 * @param idxAddrReg The host register with the address to check.
5884 * @param idxInstr The current instruction.
5885 */
5886DECL_HIDDEN_THROW(uint32_t)
5887iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5888{
5889 /*
5890 * Make sure we don't have any outstanding guest register writes as we may
5891 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5892 */
5893 off = iemNativeRegFlushPendingWrites(pReNative, off);
5894
5895#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5896 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5897#else
5898 RT_NOREF(idxInstr);
5899#endif
5900
5901#ifdef RT_ARCH_AMD64
5902 /*
5903 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5904 * return raisexcpt();
5905 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5906 */
5907 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5908
5909 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5910 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5911 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5912 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5913 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5914
5915 iemNativeRegFreeTmp(pReNative, iTmpReg);
5916
5917#elif defined(RT_ARCH_ARM64)
5918 /*
5919 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5920 * return raisexcpt();
5921 * ----
5922 * mov x1, 0x800000000000
5923 * add x1, x0, x1
5924 * cmp xzr, x1, lsr 48
5925 * b.ne .Lraisexcpt
5926 */
5927 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5928
5929 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5930 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5931 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5932 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5933
5934 iemNativeRegFreeTmp(pReNative, iTmpReg);
5935
5936#else
5937# error "Port me"
5938#endif
5939 return off;
5940}
5941
5942
5943/**
5944 * Emits code to check if that the content of @a idxAddrReg is within the limit
5945 * of CS, raising a \#GP(0) if it isn't.
5946 *
5947 * @returns New code buffer offset; throws VBox status code on error.
5948 * @param pReNative The native recompile state.
5949 * @param off The code buffer offset.
5950 * @param idxAddrReg The host register (32-bit) with the address to
5951 * check.
5952 * @param idxInstr The current instruction.
5953 */
5954DECL_HIDDEN_THROW(uint32_t)
5955iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5956 uint8_t idxAddrReg, uint8_t idxInstr)
5957{
5958 /*
5959 * Make sure we don't have any outstanding guest register writes as we may
5960 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5961 */
5962 off = iemNativeRegFlushPendingWrites(pReNative, off);
5963
5964#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5965 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5966#else
5967 RT_NOREF(idxInstr);
5968#endif
5969
5970 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5971 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5972 kIemNativeGstRegUse_ReadOnly);
5973
5974 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5975 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5976
5977 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5978 return off;
5979}
5980
5981
5982/**
5983 * Emits a call to a CImpl function or something similar.
5984 */
5985DECL_HIDDEN_THROW(uint32_t)
5986iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5987 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5988{
5989 /* Writeback everything. */
5990 off = iemNativeRegFlushPendingWrites(pReNative, off);
5991
5992 /*
5993 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5994 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5995 */
5996 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5997 fGstShwFlush
5998 | RT_BIT_64(kIemNativeGstReg_Pc)
5999 | RT_BIT_64(kIemNativeGstReg_EFlags));
6000 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6001
6002 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6003
6004 /*
6005 * Load the parameters.
6006 */
6007#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6008 /* Special code the hidden VBOXSTRICTRC pointer. */
6009 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6010 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6011 if (cAddParams > 0)
6012 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6013 if (cAddParams > 1)
6014 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6015 if (cAddParams > 2)
6016 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6017 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6018
6019#else
6020 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6021 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6022 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6023 if (cAddParams > 0)
6024 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6025 if (cAddParams > 1)
6026 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6027 if (cAddParams > 2)
6028# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6029 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6030# else
6031 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6032# endif
6033#endif
6034
6035 /*
6036 * Make the call.
6037 */
6038 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6039
6040#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6041 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6042#endif
6043
6044 /*
6045 * Check the status code.
6046 */
6047 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6048}
6049
6050
6051/**
6052 * Emits a call to a threaded worker function.
6053 */
6054DECL_HIDDEN_THROW(uint32_t)
6055iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6056{
6057 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6058
6059 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6060 off = iemNativeRegFlushPendingWrites(pReNative, off);
6061
6062 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6063 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6064
6065#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6066 /* The threaded function may throw / long jmp, so set current instruction
6067 number if we're counting. */
6068 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6069#endif
6070
6071 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6072
6073#ifdef RT_ARCH_AMD64
6074 /* Load the parameters and emit the call. */
6075# ifdef RT_OS_WINDOWS
6076# ifndef VBOXSTRICTRC_STRICT_ENABLED
6077 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6078 if (cParams > 0)
6079 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6080 if (cParams > 1)
6081 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6082 if (cParams > 2)
6083 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6084# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6085 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6086 if (cParams > 0)
6087 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6088 if (cParams > 1)
6089 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6090 if (cParams > 2)
6091 {
6092 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6093 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6094 }
6095 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6096# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6097# else
6098 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6099 if (cParams > 0)
6100 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6101 if (cParams > 1)
6102 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6103 if (cParams > 2)
6104 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6105# endif
6106
6107 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6108
6109# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6110 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6111# endif
6112
6113#elif RT_ARCH_ARM64
6114 /*
6115 * ARM64:
6116 */
6117 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6118 if (cParams > 0)
6119 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6120 if (cParams > 1)
6121 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6122 if (cParams > 2)
6123 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6124
6125 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6126
6127#else
6128# error "port me"
6129#endif
6130
6131 /*
6132 * Check the status code.
6133 */
6134 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6135
6136 return off;
6137}
6138
6139#ifdef VBOX_WITH_STATISTICS
6140/**
6141 * Emits code to update the thread call statistics.
6142 */
6143DECL_INLINE_THROW(uint32_t)
6144iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6145{
6146 /*
6147 * Update threaded function stats.
6148 */
6149 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6150 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6151# if defined(RT_ARCH_ARM64)
6152 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6153 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6154 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6155 iemNativeRegFreeTmp(pReNative, idxTmp1);
6156 iemNativeRegFreeTmp(pReNative, idxTmp2);
6157# else
6158 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6159# endif
6160 return off;
6161}
6162#endif /* VBOX_WITH_STATISTICS */
6163
6164
6165/**
6166 * Emits the code at the ReturnWithFlags label (returns
6167 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6168 */
6169static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6170{
6171 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6172 if (idxLabel != UINT32_MAX)
6173 {
6174 iemNativeLabelDefine(pReNative, idxLabel, off);
6175
6176 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6177
6178 /* jump back to the return sequence. */
6179 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6180 }
6181 return off;
6182}
6183
6184
6185/**
6186 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6187 */
6188static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6189{
6190 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6191 if (idxLabel != UINT32_MAX)
6192 {
6193 iemNativeLabelDefine(pReNative, idxLabel, off);
6194
6195 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6196
6197 /* jump back to the return sequence. */
6198 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6199 }
6200 return off;
6201}
6202
6203
6204/**
6205 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6206 */
6207static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6208{
6209 /*
6210 * Generate the rc + rcPassUp fiddling code if needed.
6211 */
6212 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6213 if (idxLabel != UINT32_MAX)
6214 {
6215 iemNativeLabelDefine(pReNative, idxLabel, off);
6216
6217 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6218#ifdef RT_ARCH_AMD64
6219# ifdef RT_OS_WINDOWS
6220# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6221 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6222# endif
6223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6224 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6225# else
6226 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6227 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6228# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6229 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6230# endif
6231# endif
6232# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6233 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6234# endif
6235
6236#else
6237 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6238 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6239 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6240#endif
6241
6242 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6243 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6244 }
6245 return off;
6246}
6247
6248
6249/**
6250 * Emits a standard epilog.
6251 */
6252static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6253{
6254 *pidxReturnLabel = UINT32_MAX;
6255
6256 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6257 off = iemNativeRegFlushPendingWrites(pReNative, off);
6258
6259 /*
6260 * Successful return, so clear the return register (eax, w0).
6261 */
6262 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6263
6264 /*
6265 * Define label for common return point.
6266 */
6267 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6268 *pidxReturnLabel = idxReturn;
6269
6270 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6271
6272 /*
6273 * Restore registers and return.
6274 */
6275#ifdef RT_ARCH_AMD64
6276 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6277
6278 /* Reposition esp at the r15 restore point. */
6279 pbCodeBuf[off++] = X86_OP_REX_W;
6280 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6282 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6283
6284 /* Pop non-volatile registers and return */
6285 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6286 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6287 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6288 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6289 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6290 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6291 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6292 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6293# ifdef RT_OS_WINDOWS
6294 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6295 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6296# endif
6297 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6298 pbCodeBuf[off++] = 0xc9; /* leave */
6299 pbCodeBuf[off++] = 0xc3; /* ret */
6300 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6301
6302#elif RT_ARCH_ARM64
6303 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6304
6305 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6306 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6307 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6308 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6309 IEMNATIVE_FRAME_VAR_SIZE / 8);
6310 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6311 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6312 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6313 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6314 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6315 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6316 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6317 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6318 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6319 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6320 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6321 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6322
6323 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6324 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6325 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6326 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6327
6328 /* retab / ret */
6329# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6330 if (1)
6331 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6332 else
6333# endif
6334 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6335
6336#else
6337# error "port me"
6338#endif
6339 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6340
6341 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6342}
6343
6344
6345/**
6346 * Emits a standard prolog.
6347 */
6348static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6349{
6350#ifdef RT_ARCH_AMD64
6351 /*
6352 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6353 * reserving 64 bytes for stack variables plus 4 non-register argument
6354 * slots. Fixed register assignment: xBX = pReNative;
6355 *
6356 * Since we always do the same register spilling, we can use the same
6357 * unwind description for all the code.
6358 */
6359 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6360 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6361 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6362 pbCodeBuf[off++] = 0x8b;
6363 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6364 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6365 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6366# ifdef RT_OS_WINDOWS
6367 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6368 pbCodeBuf[off++] = 0x8b;
6369 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6370 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6371 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6372# else
6373 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6374 pbCodeBuf[off++] = 0x8b;
6375 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6376# endif
6377 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6378 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6379 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6380 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6381 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6382 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6383 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6384 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6385
6386# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6387 /* Save the frame pointer. */
6388 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6389# endif
6390
6391 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6392 X86_GREG_xSP,
6393 IEMNATIVE_FRAME_ALIGN_SIZE
6394 + IEMNATIVE_FRAME_VAR_SIZE
6395 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6396 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6397 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6398 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6399 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6400
6401#elif RT_ARCH_ARM64
6402 /*
6403 * We set up a stack frame exactly like on x86, only we have to push the
6404 * return address our selves here. We save all non-volatile registers.
6405 */
6406 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6407
6408# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6409 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6410 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6411 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6412 /* pacibsp */
6413 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6414# endif
6415
6416 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6417 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6418 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6419 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6420 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6421 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6422 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6423 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6424 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6425 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6426 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6427 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6428 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6429 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6430 /* Save the BP and LR (ret address) registers at the top of the frame. */
6431 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6432 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6433 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6434 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6435 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6436 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6437
6438 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6439 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6440
6441 /* mov r28, r0 */
6442 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6443 /* mov r27, r1 */
6444 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6445
6446# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6447 /* Save the frame pointer. */
6448 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6449 ARMV8_A64_REG_X2);
6450# endif
6451
6452#else
6453# error "port me"
6454#endif
6455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6456 return off;
6457}
6458
6459
6460/*********************************************************************************************************************************
6461* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6462*********************************************************************************************************************************/
6463
6464/**
6465 * Internal work that allocates a variable with kind set to
6466 * kIemNativeVarKind_Invalid and no current stack allocation.
6467 *
6468 * The kind will either be set by the caller or later when the variable is first
6469 * assigned a value.
6470 *
6471 * @returns Unpacked index.
6472 * @internal
6473 */
6474static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6475{
6476 Assert(cbType > 0 && cbType <= 64);
6477 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6478 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6479 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6480 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6481 pReNative->Core.aVars[idxVar].cbVar = cbType;
6482 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6483 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6484 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6485 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6486 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6487 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6488 pReNative->Core.aVars[idxVar].u.uValue = 0;
6489#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6490 pReNative->Core.aVars[idxVar].fSimdReg = false;
6491#endif
6492 return idxVar;
6493}
6494
6495
6496/**
6497 * Internal work that allocates an argument variable w/o setting enmKind.
6498 *
6499 * @returns Unpacked index.
6500 * @internal
6501 */
6502static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6503{
6504 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6505 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6506 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6507
6508 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6509 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6510 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6511 return idxVar;
6512}
6513
6514
6515/**
6516 * Gets the stack slot for a stack variable, allocating one if necessary.
6517 *
6518 * Calling this function implies that the stack slot will contain a valid
6519 * variable value. The caller deals with any register currently assigned to the
6520 * variable, typically by spilling it into the stack slot.
6521 *
6522 * @returns The stack slot number.
6523 * @param pReNative The recompiler state.
6524 * @param idxVar The variable.
6525 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6526 */
6527DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6528{
6529 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6530 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6531 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6532
6533 /* Already got a slot? */
6534 uint8_t const idxStackSlot = pVar->idxStackSlot;
6535 if (idxStackSlot != UINT8_MAX)
6536 {
6537 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6538 return idxStackSlot;
6539 }
6540
6541 /*
6542 * A single slot is easy to allocate.
6543 * Allocate them from the top end, closest to BP, to reduce the displacement.
6544 */
6545 if (pVar->cbVar <= sizeof(uint64_t))
6546 {
6547 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6548 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6549 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6550 pVar->idxStackSlot = (uint8_t)iSlot;
6551 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6552 return (uint8_t)iSlot;
6553 }
6554
6555 /*
6556 * We need more than one stack slot.
6557 *
6558 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6559 */
6560 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6561 Assert(pVar->cbVar <= 64);
6562 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6563 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6564 uint32_t bmStack = pReNative->Core.bmStack;
6565 while (bmStack != UINT32_MAX)
6566 {
6567 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6568 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6569 iSlot = (iSlot - 1) & ~fBitAlignMask;
6570 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6571 {
6572 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6573 pVar->idxStackSlot = (uint8_t)iSlot;
6574 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6575 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6576 return (uint8_t)iSlot;
6577 }
6578
6579 bmStack |= (fBitAllocMask << iSlot);
6580 }
6581 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6582}
6583
6584
6585/**
6586 * Changes the variable to a stack variable.
6587 *
6588 * Currently this is s only possible to do the first time the variable is used,
6589 * switching later is can be implemented but not done.
6590 *
6591 * @param pReNative The recompiler state.
6592 * @param idxVar The variable.
6593 * @throws VERR_IEM_VAR_IPE_2
6594 */
6595DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6596{
6597 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6598 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6599 if (pVar->enmKind != kIemNativeVarKind_Stack)
6600 {
6601 /* We could in theory transition from immediate to stack as well, but it
6602 would involve the caller doing work storing the value on the stack. So,
6603 till that's required we only allow transition from invalid. */
6604 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6605 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6606 pVar->enmKind = kIemNativeVarKind_Stack;
6607
6608 /* Note! We don't allocate a stack slot here, that's only done when a
6609 slot is actually needed to hold a variable value. */
6610 }
6611}
6612
6613
6614/**
6615 * Sets it to a variable with a constant value.
6616 *
6617 * This does not require stack storage as we know the value and can always
6618 * reload it, unless of course it's referenced.
6619 *
6620 * @param pReNative The recompiler state.
6621 * @param idxVar The variable.
6622 * @param uValue The immediate value.
6623 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6624 */
6625DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6626{
6627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6628 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6629 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6630 {
6631 /* Only simple transitions for now. */
6632 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6633 pVar->enmKind = kIemNativeVarKind_Immediate;
6634 }
6635 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6636
6637 pVar->u.uValue = uValue;
6638 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6639 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6640 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6641}
6642
6643
6644/**
6645 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6646 *
6647 * This does not require stack storage as we know the value and can always
6648 * reload it. Loading is postponed till needed.
6649 *
6650 * @param pReNative The recompiler state.
6651 * @param idxVar The variable. Unpacked.
6652 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6653 *
6654 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6655 * @internal
6656 */
6657static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6658{
6659 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6660 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6661
6662 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6663 {
6664 /* Only simple transitions for now. */
6665 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6666 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6667 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6668 }
6669 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6670
6671 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6672
6673 /* Update the other variable, ensure it's a stack variable. */
6674 /** @todo handle variables with const values... that'll go boom now. */
6675 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6676 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6677}
6678
6679
6680/**
6681 * Sets the variable to a reference (pointer) to a guest register reference.
6682 *
6683 * This does not require stack storage as we know the value and can always
6684 * reload it. Loading is postponed till needed.
6685 *
6686 * @param pReNative The recompiler state.
6687 * @param idxVar The variable.
6688 * @param enmRegClass The class guest registers to reference.
6689 * @param idxReg The register within @a enmRegClass to reference.
6690 *
6691 * @throws VERR_IEM_VAR_IPE_2
6692 */
6693DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6694 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6695{
6696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6697 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6698
6699 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6700 {
6701 /* Only simple transitions for now. */
6702 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6703 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6704 }
6705 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6706
6707 pVar->u.GstRegRef.enmClass = enmRegClass;
6708 pVar->u.GstRegRef.idx = idxReg;
6709}
6710
6711
6712DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6713{
6714 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6715}
6716
6717
6718DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6719{
6720 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6721
6722 /* Since we're using a generic uint64_t value type, we must truncate it if
6723 the variable is smaller otherwise we may end up with too large value when
6724 scaling up a imm8 w/ sign-extension.
6725
6726 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6727 in the bios, bx=1) when running on arm, because clang expect 16-bit
6728 register parameters to have bits 16 and up set to zero. Instead of
6729 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6730 CF value in the result. */
6731 switch (cbType)
6732 {
6733 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6734 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6735 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6736 }
6737 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6738 return idxVar;
6739}
6740
6741
6742DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6743{
6744 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6745 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6746 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6747 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6748 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6749 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6750
6751 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6752 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6753 return idxArgVar;
6754}
6755
6756
6757DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6758{
6759 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6760 /* Don't set to stack now, leave that to the first use as for instance
6761 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6762 return idxVar;
6763}
6764
6765
6766DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6767{
6768 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6769
6770 /* Since we're using a generic uint64_t value type, we must truncate it if
6771 the variable is smaller otherwise we may end up with too large value when
6772 scaling up a imm8 w/ sign-extension. */
6773 switch (cbType)
6774 {
6775 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6776 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6777 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6778 }
6779 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6780 return idxVar;
6781}
6782
6783
6784DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6785{
6786 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6787 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6788
6789 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6790 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6791
6792 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6793
6794 /* Truncate the value to this variables size. */
6795 switch (cbType)
6796 {
6797 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6798 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6799 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6800 }
6801
6802 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6803 iemNativeVarRegisterRelease(pReNative, idxVar);
6804 return idxVar;
6805}
6806
6807
6808/**
6809 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6810 * fixed till we call iemNativeVarRegisterRelease.
6811 *
6812 * @returns The host register number.
6813 * @param pReNative The recompiler state.
6814 * @param idxVar The variable.
6815 * @param poff Pointer to the instruction buffer offset.
6816 * In case a register needs to be freed up or the value
6817 * loaded off the stack.
6818 * @param fInitialized Set if the variable must already have been initialized.
6819 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6820 * the case.
6821 * @param idxRegPref Preferred register number or UINT8_MAX.
6822 */
6823DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6824 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6825{
6826 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6827 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6828 Assert(pVar->cbVar <= 8);
6829 Assert(!pVar->fRegAcquired);
6830
6831 uint8_t idxReg = pVar->idxReg;
6832 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6833 {
6834 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6835 && pVar->enmKind < kIemNativeVarKind_End);
6836 pVar->fRegAcquired = true;
6837 return idxReg;
6838 }
6839
6840 /*
6841 * If the kind of variable has not yet been set, default to 'stack'.
6842 */
6843 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6844 && pVar->enmKind < kIemNativeVarKind_End);
6845 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6846 iemNativeVarSetKindToStack(pReNative, idxVar);
6847
6848 /*
6849 * We have to allocate a register for the variable, even if its a stack one
6850 * as we don't know if there are modification being made to it before its
6851 * finalized (todo: analyze and insert hints about that?).
6852 *
6853 * If we can, we try get the correct register for argument variables. This
6854 * is assuming that most argument variables are fetched as close as possible
6855 * to the actual call, so that there aren't any interfering hidden calls
6856 * (memory accesses, etc) inbetween.
6857 *
6858 * If we cannot or it's a variable, we make sure no argument registers
6859 * that will be used by this MC block will be allocated here, and we always
6860 * prefer non-volatile registers to avoid needing to spill stuff for internal
6861 * call.
6862 */
6863 /** @todo Detect too early argument value fetches and warn about hidden
6864 * calls causing less optimal code to be generated in the python script. */
6865
6866 uint8_t const uArgNo = pVar->uArgNo;
6867 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6868 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6869 {
6870 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6871
6872#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6873 /* Writeback any dirty shadow registers we are about to unshadow. */
6874 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6875#endif
6876
6877 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6878 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6879 }
6880 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6881 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6882 {
6883 /** @todo there must be a better way for this and boot cArgsX? */
6884 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6885 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6886 & ~pReNative->Core.bmHstRegsWithGstShadow
6887 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6888 & fNotArgsMask;
6889 if (fRegs)
6890 {
6891 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6892 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6893 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6894 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6895 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6896 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6897 }
6898 else
6899 {
6900 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6901 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6902 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6903 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6904 }
6905 }
6906 else
6907 {
6908 idxReg = idxRegPref;
6909 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6910 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6911 }
6912 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6913 pVar->idxReg = idxReg;
6914
6915#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6916 pVar->fSimdReg = false;
6917#endif
6918
6919 /*
6920 * Load it off the stack if we've got a stack slot.
6921 */
6922 uint8_t const idxStackSlot = pVar->idxStackSlot;
6923 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6924 {
6925 Assert(fInitialized);
6926 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6927 switch (pVar->cbVar)
6928 {
6929 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6930 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6931 case 3: AssertFailed(); RT_FALL_THRU();
6932 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6933 default: AssertFailed(); RT_FALL_THRU();
6934 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6935 }
6936 }
6937 else
6938 {
6939 Assert(idxStackSlot == UINT8_MAX);
6940 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6941 }
6942 pVar->fRegAcquired = true;
6943 return idxReg;
6944}
6945
6946
6947#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6948/**
6949 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
6950 * fixed till we call iemNativeVarRegisterRelease.
6951 *
6952 * @returns The host register number.
6953 * @param pReNative The recompiler state.
6954 * @param idxVar The variable.
6955 * @param poff Pointer to the instruction buffer offset.
6956 * In case a register needs to be freed up or the value
6957 * loaded off the stack.
6958 * @param fInitialized Set if the variable must already have been initialized.
6959 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6960 * the case.
6961 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
6962 */
6963DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6964 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6965{
6966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6967 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6968 Assert( pVar->cbVar == sizeof(RTUINT128U)
6969 || pVar->cbVar == sizeof(RTUINT256U));
6970 Assert(!pVar->fRegAcquired);
6971
6972 uint8_t idxReg = pVar->idxReg;
6973 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
6974 {
6975 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6976 && pVar->enmKind < kIemNativeVarKind_End);
6977 pVar->fRegAcquired = true;
6978 return idxReg;
6979 }
6980
6981 /*
6982 * If the kind of variable has not yet been set, default to 'stack'.
6983 */
6984 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6985 && pVar->enmKind < kIemNativeVarKind_End);
6986 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6987 iemNativeVarSetKindToStack(pReNative, idxVar);
6988
6989 /*
6990 * We have to allocate a register for the variable, even if its a stack one
6991 * as we don't know if there are modification being made to it before its
6992 * finalized (todo: analyze and insert hints about that?).
6993 *
6994 * If we can, we try get the correct register for argument variables. This
6995 * is assuming that most argument variables are fetched as close as possible
6996 * to the actual call, so that there aren't any interfering hidden calls
6997 * (memory accesses, etc) inbetween.
6998 *
6999 * If we cannot or it's a variable, we make sure no argument registers
7000 * that will be used by this MC block will be allocated here, and we always
7001 * prefer non-volatile registers to avoid needing to spill stuff for internal
7002 * call.
7003 */
7004 /** @todo Detect too early argument value fetches and warn about hidden
7005 * calls causing less optimal code to be generated in the python script. */
7006
7007 uint8_t const uArgNo = pVar->uArgNo;
7008 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7009
7010 /* SIMD is bit simpler for now because there is no support for arguments. */
7011 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7012 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7013 {
7014 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7015 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7016 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7017 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7018 & fNotArgsMask;
7019 if (fRegs)
7020 {
7021 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7022 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7023 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7024 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7025 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7026 }
7027 else
7028 {
7029 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7030 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7031 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7032 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7033 }
7034 }
7035 else
7036 {
7037 idxReg = idxRegPref;
7038 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7039 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7040 }
7041 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7042
7043 pVar->fSimdReg = true;
7044 pVar->idxReg = idxReg;
7045
7046 /*
7047 * Load it off the stack if we've got a stack slot.
7048 */
7049 uint8_t const idxStackSlot = pVar->idxStackSlot;
7050 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7051 {
7052 Assert(fInitialized);
7053 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7054 switch (pVar->cbVar)
7055 {
7056 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7057 default: AssertFailed(); RT_FALL_THRU();
7058 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7059 }
7060 }
7061 else
7062 {
7063 Assert(idxStackSlot == UINT8_MAX);
7064 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7065 }
7066 pVar->fRegAcquired = true;
7067 return idxReg;
7068}
7069#endif
7070
7071
7072/**
7073 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7074 * guest register.
7075 *
7076 * This function makes sure there is a register for it and sets it to be the
7077 * current shadow copy of @a enmGstReg.
7078 *
7079 * @returns The host register number.
7080 * @param pReNative The recompiler state.
7081 * @param idxVar The variable.
7082 * @param enmGstReg The guest register this variable will be written to
7083 * after this call.
7084 * @param poff Pointer to the instruction buffer offset.
7085 * In case a register needs to be freed up or if the
7086 * variable content needs to be loaded off the stack.
7087 *
7088 * @note We DO NOT expect @a idxVar to be an argument variable,
7089 * because we can only in the commit stage of an instruction when this
7090 * function is used.
7091 */
7092DECL_HIDDEN_THROW(uint8_t)
7093iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7094{
7095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7096 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7097 Assert(!pVar->fRegAcquired);
7098 AssertMsgStmt( pVar->cbVar <= 8
7099 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7100 || pVar->enmKind == kIemNativeVarKind_Stack),
7101 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7102 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7103 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7104
7105 /*
7106 * This shouldn't ever be used for arguments, unless it's in a weird else
7107 * branch that doesn't do any calling and even then it's questionable.
7108 *
7109 * However, in case someone writes crazy wrong MC code and does register
7110 * updates before making calls, just use the regular register allocator to
7111 * ensure we get a register suitable for the intended argument number.
7112 */
7113 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7114
7115 /*
7116 * If there is already a register for the variable, we transfer/set the
7117 * guest shadow copy assignment to it.
7118 */
7119 uint8_t idxReg = pVar->idxReg;
7120 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7121 {
7122#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7123 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7124 {
7125# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7126 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7127 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7128# endif
7129 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7130 }
7131#endif
7132
7133 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7134 {
7135 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7136 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7137 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7138 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7139 }
7140 else
7141 {
7142 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7143 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7144 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7145 }
7146 /** @todo figure this one out. We need some way of making sure the register isn't
7147 * modified after this point, just in case we start writing crappy MC code. */
7148 pVar->enmGstReg = enmGstReg;
7149 pVar->fRegAcquired = true;
7150 return idxReg;
7151 }
7152 Assert(pVar->uArgNo == UINT8_MAX);
7153
7154 /*
7155 * Because this is supposed to be the commit stage, we're just tag along with the
7156 * temporary register allocator and upgrade it to a variable register.
7157 */
7158 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7159 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7160 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7161 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7162 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7163 pVar->idxReg = idxReg;
7164
7165 /*
7166 * Now we need to load the register value.
7167 */
7168 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7169 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7170 else
7171 {
7172 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7173 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7174 switch (pVar->cbVar)
7175 {
7176 case sizeof(uint64_t):
7177 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7178 break;
7179 case sizeof(uint32_t):
7180 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7181 break;
7182 case sizeof(uint16_t):
7183 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7184 break;
7185 case sizeof(uint8_t):
7186 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7187 break;
7188 default:
7189 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7190 }
7191 }
7192
7193 pVar->fRegAcquired = true;
7194 return idxReg;
7195}
7196
7197
7198/**
7199 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7200 *
7201 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7202 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7203 * requirement of flushing anything in volatile host registers when making a
7204 * call.
7205 *
7206 * @returns New @a off value.
7207 * @param pReNative The recompiler state.
7208 * @param off The code buffer position.
7209 * @param fHstRegsNotToSave Set of registers not to save & restore.
7210 */
7211DECL_HIDDEN_THROW(uint32_t)
7212iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7213{
7214 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7215 if (fHstRegs)
7216 {
7217 do
7218 {
7219 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7220 fHstRegs &= ~RT_BIT_32(idxHstReg);
7221
7222 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7223 {
7224 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7225 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7226 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7227 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7228 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7229 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7230 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7231 {
7232 case kIemNativeVarKind_Stack:
7233 {
7234 /* Temporarily spill the variable register. */
7235 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7236 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7237 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7238 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7239 continue;
7240 }
7241
7242 case kIemNativeVarKind_Immediate:
7243 case kIemNativeVarKind_VarRef:
7244 case kIemNativeVarKind_GstRegRef:
7245 /* It is weird to have any of these loaded at this point. */
7246 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7247 continue;
7248
7249 case kIemNativeVarKind_End:
7250 case kIemNativeVarKind_Invalid:
7251 break;
7252 }
7253 AssertFailed();
7254 }
7255 else
7256 {
7257 /*
7258 * Allocate a temporary stack slot and spill the register to it.
7259 */
7260 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7261 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7262 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7263 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7264 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7265 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7266 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7267 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7268 }
7269 } while (fHstRegs);
7270 }
7271#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7272
7273 /*
7274 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7275 * which would be more difficult due to spanning multiple stack slots and different sizes
7276 * (besides we only have a limited amount of slots at the moment).
7277 *
7278 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7279 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7280 */
7281 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7282
7283 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7284 if (fHstRegs)
7285 {
7286 do
7287 {
7288 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7289 fHstRegs &= ~RT_BIT_32(idxHstReg);
7290
7291 /* Fixed reserved and temporary registers don't need saving. */
7292 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7293 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7294 continue;
7295
7296 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7297
7298 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7299 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7300 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7301 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7302 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7303 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7304 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7305 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7307 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7308 {
7309 case kIemNativeVarKind_Stack:
7310 {
7311 /* Temporarily spill the variable register. */
7312 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7313 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7314 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7315 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7316 if (cbVar == sizeof(RTUINT128U))
7317 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7318 else
7319 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7320 continue;
7321 }
7322
7323 case kIemNativeVarKind_Immediate:
7324 case kIemNativeVarKind_VarRef:
7325 case kIemNativeVarKind_GstRegRef:
7326 /* It is weird to have any of these loaded at this point. */
7327 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7328 continue;
7329
7330 case kIemNativeVarKind_End:
7331 case kIemNativeVarKind_Invalid:
7332 break;
7333 }
7334 AssertFailed();
7335 } while (fHstRegs);
7336 }
7337#endif
7338 return off;
7339}
7340
7341
7342/**
7343 * Emit code to restore volatile registers after to a call to a helper.
7344 *
7345 * @returns New @a off value.
7346 * @param pReNative The recompiler state.
7347 * @param off The code buffer position.
7348 * @param fHstRegsNotToSave Set of registers not to save & restore.
7349 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7350 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7351 */
7352DECL_HIDDEN_THROW(uint32_t)
7353iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7354{
7355 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7356 if (fHstRegs)
7357 {
7358 do
7359 {
7360 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7361 fHstRegs &= ~RT_BIT_32(idxHstReg);
7362
7363 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7364 {
7365 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7367 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7368 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7369 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7370 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7371 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7372 {
7373 case kIemNativeVarKind_Stack:
7374 {
7375 /* Unspill the variable register. */
7376 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7377 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7378 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7379 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7380 continue;
7381 }
7382
7383 case kIemNativeVarKind_Immediate:
7384 case kIemNativeVarKind_VarRef:
7385 case kIemNativeVarKind_GstRegRef:
7386 /* It is weird to have any of these loaded at this point. */
7387 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7388 continue;
7389
7390 case kIemNativeVarKind_End:
7391 case kIemNativeVarKind_Invalid:
7392 break;
7393 }
7394 AssertFailed();
7395 }
7396 else
7397 {
7398 /*
7399 * Restore from temporary stack slot.
7400 */
7401 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7402 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7403 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7404 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7405
7406 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7407 }
7408 } while (fHstRegs);
7409 }
7410#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7411 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7412 if (fHstRegs)
7413 {
7414 do
7415 {
7416 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7417 fHstRegs &= ~RT_BIT_32(idxHstReg);
7418
7419 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7420 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7421 continue;
7422 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7423
7424 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7425 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7426 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7427 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7428 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7429 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7430 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7431 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7432 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7433 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7434 {
7435 case kIemNativeVarKind_Stack:
7436 {
7437 /* Unspill the variable register. */
7438 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7439 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7440 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7441 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7442
7443 if (cbVar == sizeof(RTUINT128U))
7444 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7445 else
7446 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7447 continue;
7448 }
7449
7450 case kIemNativeVarKind_Immediate:
7451 case kIemNativeVarKind_VarRef:
7452 case kIemNativeVarKind_GstRegRef:
7453 /* It is weird to have any of these loaded at this point. */
7454 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7455 continue;
7456
7457 case kIemNativeVarKind_End:
7458 case kIemNativeVarKind_Invalid:
7459 break;
7460 }
7461 AssertFailed();
7462 } while (fHstRegs);
7463 }
7464#endif
7465 return off;
7466}
7467
7468
7469/**
7470 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7471 *
7472 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7473 *
7474 * ASSUMES that @a idxVar is valid and unpacked.
7475 */
7476DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7477{
7478 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7479 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7480 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7481 {
7482 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7483 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7484 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7485 Assert(cSlots > 0);
7486 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7487 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7488 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7489 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7490 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7491 }
7492 else
7493 Assert(idxStackSlot == UINT8_MAX);
7494}
7495
7496
7497/**
7498 * Worker that frees a single variable.
7499 *
7500 * ASSUMES that @a idxVar is valid and unpacked.
7501 */
7502DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7503{
7504 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7505 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7506 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7507
7508 /* Free the host register first if any assigned. */
7509 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7510#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7511 if ( idxHstReg != UINT8_MAX
7512 && pReNative->Core.aVars[idxVar].fSimdReg)
7513 {
7514 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7515 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7516 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7517 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7518 }
7519 else
7520#endif
7521 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7522 {
7523 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7524 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7525 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7526 }
7527
7528 /* Free argument mapping. */
7529 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7530 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7531 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7532
7533 /* Free the stack slots. */
7534 iemNativeVarFreeStackSlots(pReNative, idxVar);
7535
7536 /* Free the actual variable. */
7537 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7538 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7539}
7540
7541
7542/**
7543 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7544 */
7545DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7546{
7547 while (bmVars != 0)
7548 {
7549 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7550 bmVars &= ~RT_BIT_32(idxVar);
7551
7552#if 1 /** @todo optimize by simplifying this later... */
7553 iemNativeVarFreeOneWorker(pReNative, idxVar);
7554#else
7555 /* Only need to free the host register, the rest is done as bulk updates below. */
7556 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7557 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7558 {
7559 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7560 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7561 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7562 }
7563#endif
7564 }
7565#if 0 /** @todo optimize by simplifying this later... */
7566 pReNative->Core.bmVars = 0;
7567 pReNative->Core.bmStack = 0;
7568 pReNative->Core.u64ArgVars = UINT64_MAX;
7569#endif
7570}
7571
7572
7573
7574/*********************************************************************************************************************************
7575* Emitters for IEM_MC_CALL_CIMPL_XXX *
7576*********************************************************************************************************************************/
7577
7578/**
7579 * Emits code to load a reference to the given guest register into @a idxGprDst.
7580 */
7581DECL_HIDDEN_THROW(uint32_t)
7582iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7583 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7584{
7585#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7586 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7587#endif
7588
7589 /*
7590 * Get the offset relative to the CPUMCTX structure.
7591 */
7592 uint32_t offCpumCtx;
7593 switch (enmClass)
7594 {
7595 case kIemNativeGstRegRef_Gpr:
7596 Assert(idxRegInClass < 16);
7597 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7598 break;
7599
7600 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7601 Assert(idxRegInClass < 4);
7602 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7603 break;
7604
7605 case kIemNativeGstRegRef_EFlags:
7606 Assert(idxRegInClass == 0);
7607 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7608 break;
7609
7610 case kIemNativeGstRegRef_MxCsr:
7611 Assert(idxRegInClass == 0);
7612 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7613 break;
7614
7615 case kIemNativeGstRegRef_FpuReg:
7616 Assert(idxRegInClass < 8);
7617 AssertFailed(); /** @todo what kind of indexing? */
7618 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7619 break;
7620
7621 case kIemNativeGstRegRef_MReg:
7622 Assert(idxRegInClass < 8);
7623 AssertFailed(); /** @todo what kind of indexing? */
7624 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7625 break;
7626
7627 case kIemNativeGstRegRef_XReg:
7628 Assert(idxRegInClass < 16);
7629 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7630 break;
7631
7632 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7633 Assert(idxRegInClass == 0);
7634 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7635 break;
7636
7637 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7638 Assert(idxRegInClass == 0);
7639 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7640 break;
7641
7642 default:
7643 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7644 }
7645
7646 /*
7647 * Load the value into the destination register.
7648 */
7649#ifdef RT_ARCH_AMD64
7650 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7651
7652#elif defined(RT_ARCH_ARM64)
7653 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7654 Assert(offCpumCtx < 4096);
7655 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7656
7657#else
7658# error "Port me!"
7659#endif
7660
7661 return off;
7662}
7663
7664
7665/**
7666 * Common code for CIMPL and AIMPL calls.
7667 *
7668 * These are calls that uses argument variables and such. They should not be
7669 * confused with internal calls required to implement an MC operation,
7670 * like a TLB load and similar.
7671 *
7672 * Upon return all that is left to do is to load any hidden arguments and
7673 * perform the call. All argument variables are freed.
7674 *
7675 * @returns New code buffer offset; throws VBox status code on error.
7676 * @param pReNative The native recompile state.
7677 * @param off The code buffer offset.
7678 * @param cArgs The total nubmer of arguments (includes hidden
7679 * count).
7680 * @param cHiddenArgs The number of hidden arguments. The hidden
7681 * arguments must not have any variable declared for
7682 * them, whereas all the regular arguments must
7683 * (tstIEMCheckMc ensures this).
7684 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7685 * this will still flush pending writes in call volatile registers if false.
7686 */
7687DECL_HIDDEN_THROW(uint32_t)
7688iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7689 bool fFlushPendingWrites /*= true*/)
7690{
7691#ifdef VBOX_STRICT
7692 /*
7693 * Assert sanity.
7694 */
7695 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7696 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7697 for (unsigned i = 0; i < cHiddenArgs; i++)
7698 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7699 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7700 {
7701 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7702 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7703 }
7704 iemNativeRegAssertSanity(pReNative);
7705#endif
7706
7707 /* We don't know what the called function makes use of, so flush any pending register writes. */
7708 RT_NOREF(fFlushPendingWrites);
7709#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7710 if (fFlushPendingWrites)
7711#endif
7712 off = iemNativeRegFlushPendingWrites(pReNative, off);
7713
7714 /*
7715 * Before we do anything else, go over variables that are referenced and
7716 * make sure they are not in a register.
7717 */
7718 uint32_t bmVars = pReNative->Core.bmVars;
7719 if (bmVars)
7720 {
7721 do
7722 {
7723 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7724 bmVars &= ~RT_BIT_32(idxVar);
7725
7726 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7727 {
7728 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7729#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7730 if ( idxRegOld != UINT8_MAX
7731 && pReNative->Core.aVars[idxVar].fSimdReg)
7732 {
7733 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7734 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7735
7736 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7737 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7738 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7739 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7740 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7741 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7742 else
7743 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7744
7745 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7746 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7747
7748 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7749 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7750 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7751 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7752 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7753 }
7754 else
7755#endif
7756 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7757 {
7758 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7759 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7760 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7761 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7762 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7763
7764 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7765 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7766 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7767 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7768 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7769 }
7770 }
7771 } while (bmVars != 0);
7772#if 0 //def VBOX_STRICT
7773 iemNativeRegAssertSanity(pReNative);
7774#endif
7775 }
7776
7777 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7778
7779#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7780 /*
7781 * At the very first step go over the host registers that will be used for arguments
7782 * don't shadow anything which needs writing back first.
7783 */
7784 for (uint32_t i = 0; i < cRegArgs; i++)
7785 {
7786 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7787
7788 /* Writeback any dirty guest shadows before using this register. */
7789 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7790 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7791 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7792 }
7793#endif
7794
7795 /*
7796 * First, go over the host registers that will be used for arguments and make
7797 * sure they either hold the desired argument or are free.
7798 */
7799 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7800 {
7801 for (uint32_t i = 0; i < cRegArgs; i++)
7802 {
7803 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7804 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7805 {
7806 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7807 {
7808 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7809 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7810 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7811 Assert(pVar->idxReg == idxArgReg);
7812 uint8_t const uArgNo = pVar->uArgNo;
7813 if (uArgNo == i)
7814 { /* prefect */ }
7815 /* The variable allocator logic should make sure this is impossible,
7816 except for when the return register is used as a parameter (ARM,
7817 but not x86). */
7818#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7819 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7820 {
7821# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7822# error "Implement this"
7823# endif
7824 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7825 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7826 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7827 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7828 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7829 }
7830#endif
7831 else
7832 {
7833 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7834
7835 if (pVar->enmKind == kIemNativeVarKind_Stack)
7836 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7837 else
7838 {
7839 /* just free it, can be reloaded if used again */
7840 pVar->idxReg = UINT8_MAX;
7841 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7842 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7843 }
7844 }
7845 }
7846 else
7847 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7848 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7849 }
7850 }
7851#if 0 //def VBOX_STRICT
7852 iemNativeRegAssertSanity(pReNative);
7853#endif
7854 }
7855
7856 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7857
7858#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7859 /*
7860 * If there are any stack arguments, make sure they are in their place as well.
7861 *
7862 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7863 * the caller) be loading it later and it must be free (see first loop).
7864 */
7865 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7866 {
7867 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7868 {
7869 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7870 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7871 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7872 {
7873 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7874 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7875 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7876 pVar->idxReg = UINT8_MAX;
7877 }
7878 else
7879 {
7880 /* Use ARG0 as temp for stuff we need registers for. */
7881 switch (pVar->enmKind)
7882 {
7883 case kIemNativeVarKind_Stack:
7884 {
7885 uint8_t const idxStackSlot = pVar->idxStackSlot;
7886 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7887 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7888 iemNativeStackCalcBpDisp(idxStackSlot));
7889 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7890 continue;
7891 }
7892
7893 case kIemNativeVarKind_Immediate:
7894 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7895 continue;
7896
7897 case kIemNativeVarKind_VarRef:
7898 {
7899 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7900 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7901 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7902 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7903 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7904# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7905 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
7906 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
7907 if ( fSimdReg
7908 && idxRegOther != UINT8_MAX)
7909 {
7910 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7911 if (cbVar == sizeof(RTUINT128U))
7912 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
7913 else
7914 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
7915 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7916 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7917 }
7918 else
7919# endif
7920 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7921 {
7922 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7923 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7924 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7925 }
7926 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7927 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7928 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7929 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7930 continue;
7931 }
7932
7933 case kIemNativeVarKind_GstRegRef:
7934 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7935 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
7936 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7937 continue;
7938
7939 case kIemNativeVarKind_Invalid:
7940 case kIemNativeVarKind_End:
7941 break;
7942 }
7943 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7944 }
7945 }
7946# if 0 //def VBOX_STRICT
7947 iemNativeRegAssertSanity(pReNative);
7948# endif
7949 }
7950#else
7951 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7952#endif
7953
7954 /*
7955 * Make sure the argument variables are loaded into their respective registers.
7956 *
7957 * We can optimize this by ASSUMING that any register allocations are for
7958 * registeres that have already been loaded and are ready. The previous step
7959 * saw to that.
7960 */
7961 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7962 {
7963 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7964 {
7965 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7966 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7967 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
7968 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7969 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7970 else
7971 {
7972 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7973 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7974 {
7975 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7976 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
7977 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
7978 | RT_BIT_32(idxArgReg);
7979 pVar->idxReg = idxArgReg;
7980 }
7981 else
7982 {
7983 /* Use ARG0 as temp for stuff we need registers for. */
7984 switch (pVar->enmKind)
7985 {
7986 case kIemNativeVarKind_Stack:
7987 {
7988 uint8_t const idxStackSlot = pVar->idxStackSlot;
7989 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7990 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7991 continue;
7992 }
7993
7994 case kIemNativeVarKind_Immediate:
7995 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
7996 continue;
7997
7998 case kIemNativeVarKind_VarRef:
7999 {
8000 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8001 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8002 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8003 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8004 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8005 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8006#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8007 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8008 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8009 if ( fSimdReg
8010 && idxRegOther != UINT8_MAX)
8011 {
8012 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8013 if (cbVar == sizeof(RTUINT128U))
8014 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8015 else
8016 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8017 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8018 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8019 }
8020 else
8021#endif
8022 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8023 {
8024 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8025 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8026 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8027 }
8028 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8029 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8030 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8031 continue;
8032 }
8033
8034 case kIemNativeVarKind_GstRegRef:
8035 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8036 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8037 continue;
8038
8039 case kIemNativeVarKind_Invalid:
8040 case kIemNativeVarKind_End:
8041 break;
8042 }
8043 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8044 }
8045 }
8046 }
8047#if 0 //def VBOX_STRICT
8048 iemNativeRegAssertSanity(pReNative);
8049#endif
8050 }
8051#ifdef VBOX_STRICT
8052 else
8053 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8054 {
8055 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8056 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8057 }
8058#endif
8059
8060 /*
8061 * Free all argument variables (simplified).
8062 * Their lifetime always expires with the call they are for.
8063 */
8064 /** @todo Make the python script check that arguments aren't used after
8065 * IEM_MC_CALL_XXXX. */
8066 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8067 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8068 * an argument value. There is also some FPU stuff. */
8069 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8070 {
8071 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8072 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8073
8074 /* no need to free registers: */
8075 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8076 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8077 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8078 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8079 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8080 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8081
8082 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8083 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8084 iemNativeVarFreeStackSlots(pReNative, idxVar);
8085 }
8086 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8087
8088 /*
8089 * Flush volatile registers as we make the call.
8090 */
8091 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8092
8093 return off;
8094}
8095
8096
8097
8098/*********************************************************************************************************************************
8099* TLB Lookup. *
8100*********************************************************************************************************************************/
8101
8102/**
8103 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8104 */
8105DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8106{
8107 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8108 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8109 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8110 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8111
8112 /* Do the lookup manually. */
8113 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8114 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8115 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8116 if (RT_LIKELY(pTlbe->uTag == uTag))
8117 {
8118 /*
8119 * Check TLB page table level access flags.
8120 */
8121 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8122 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8123 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8124 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8125 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8126 | IEMTLBE_F_PG_UNASSIGNED
8127 | IEMTLBE_F_PT_NO_ACCESSED
8128 | fNoWriteNoDirty | fNoUser);
8129 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8130 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8131 {
8132 /*
8133 * Return the address.
8134 */
8135 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8136 if ((uintptr_t)pbAddr == uResult)
8137 return;
8138 RT_NOREF(cbMem);
8139 AssertFailed();
8140 }
8141 else
8142 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8143 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8144 }
8145 else
8146 AssertFailed();
8147 RT_BREAKPOINT();
8148}
8149
8150/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8151
8152
8153
8154/*********************************************************************************************************************************
8155* Recompiler Core. *
8156*********************************************************************************************************************************/
8157
8158/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8159static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8160{
8161 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8162 pDis->cbCachedInstr += cbMaxRead;
8163 RT_NOREF(cbMinRead);
8164 return VERR_NO_DATA;
8165}
8166
8167
8168DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8169{
8170 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8171 {
8172#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8173 ENTRY(fLocalForcedActions),
8174 ENTRY(iem.s.rcPassUp),
8175 ENTRY(iem.s.fExec),
8176 ENTRY(iem.s.pbInstrBuf),
8177 ENTRY(iem.s.uInstrBufPc),
8178 ENTRY(iem.s.GCPhysInstrBuf),
8179 ENTRY(iem.s.cbInstrBufTotal),
8180 ENTRY(iem.s.idxTbCurInstr),
8181#ifdef VBOX_WITH_STATISTICS
8182 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8183 ENTRY(iem.s.StatNativeTlbHitsForStore),
8184 ENTRY(iem.s.StatNativeTlbHitsForStack),
8185 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8186 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8187 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8188 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8189 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8190#endif
8191 ENTRY(iem.s.DataTlb.aEntries),
8192 ENTRY(iem.s.DataTlb.uTlbRevision),
8193 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8194 ENTRY(iem.s.DataTlb.cTlbHits),
8195 ENTRY(iem.s.CodeTlb.aEntries),
8196 ENTRY(iem.s.CodeTlb.uTlbRevision),
8197 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8198 ENTRY(iem.s.CodeTlb.cTlbHits),
8199 ENTRY(pVMR3),
8200 ENTRY(cpum.GstCtx.rax),
8201 ENTRY(cpum.GstCtx.ah),
8202 ENTRY(cpum.GstCtx.rcx),
8203 ENTRY(cpum.GstCtx.ch),
8204 ENTRY(cpum.GstCtx.rdx),
8205 ENTRY(cpum.GstCtx.dh),
8206 ENTRY(cpum.GstCtx.rbx),
8207 ENTRY(cpum.GstCtx.bh),
8208 ENTRY(cpum.GstCtx.rsp),
8209 ENTRY(cpum.GstCtx.rbp),
8210 ENTRY(cpum.GstCtx.rsi),
8211 ENTRY(cpum.GstCtx.rdi),
8212 ENTRY(cpum.GstCtx.r8),
8213 ENTRY(cpum.GstCtx.r9),
8214 ENTRY(cpum.GstCtx.r10),
8215 ENTRY(cpum.GstCtx.r11),
8216 ENTRY(cpum.GstCtx.r12),
8217 ENTRY(cpum.GstCtx.r13),
8218 ENTRY(cpum.GstCtx.r14),
8219 ENTRY(cpum.GstCtx.r15),
8220 ENTRY(cpum.GstCtx.es.Sel),
8221 ENTRY(cpum.GstCtx.es.u64Base),
8222 ENTRY(cpum.GstCtx.es.u32Limit),
8223 ENTRY(cpum.GstCtx.es.Attr),
8224 ENTRY(cpum.GstCtx.cs.Sel),
8225 ENTRY(cpum.GstCtx.cs.u64Base),
8226 ENTRY(cpum.GstCtx.cs.u32Limit),
8227 ENTRY(cpum.GstCtx.cs.Attr),
8228 ENTRY(cpum.GstCtx.ss.Sel),
8229 ENTRY(cpum.GstCtx.ss.u64Base),
8230 ENTRY(cpum.GstCtx.ss.u32Limit),
8231 ENTRY(cpum.GstCtx.ss.Attr),
8232 ENTRY(cpum.GstCtx.ds.Sel),
8233 ENTRY(cpum.GstCtx.ds.u64Base),
8234 ENTRY(cpum.GstCtx.ds.u32Limit),
8235 ENTRY(cpum.GstCtx.ds.Attr),
8236 ENTRY(cpum.GstCtx.fs.Sel),
8237 ENTRY(cpum.GstCtx.fs.u64Base),
8238 ENTRY(cpum.GstCtx.fs.u32Limit),
8239 ENTRY(cpum.GstCtx.fs.Attr),
8240 ENTRY(cpum.GstCtx.gs.Sel),
8241 ENTRY(cpum.GstCtx.gs.u64Base),
8242 ENTRY(cpum.GstCtx.gs.u32Limit),
8243 ENTRY(cpum.GstCtx.gs.Attr),
8244 ENTRY(cpum.GstCtx.rip),
8245 ENTRY(cpum.GstCtx.eflags),
8246 ENTRY(cpum.GstCtx.uRipInhibitInt),
8247 ENTRY(cpum.GstCtx.cr0),
8248 ENTRY(cpum.GstCtx.cr4),
8249 ENTRY(cpum.GstCtx.aXcr[0]),
8250 ENTRY(cpum.GstCtx.aXcr[1]),
8251#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8252 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8253 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8254 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8255 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8256 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8257 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8258 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8259 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8260 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8261 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8262 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8263 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8264 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8265 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8266 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8267 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8268 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8269 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8270 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8271 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8272 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8273 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8274 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8275 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8276 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8277 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8278 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8279 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8280 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8281 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8282 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8283 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8284#endif
8285#undef ENTRY
8286 };
8287#ifdef VBOX_STRICT
8288 static bool s_fOrderChecked = false;
8289 if (!s_fOrderChecked)
8290 {
8291 s_fOrderChecked = true;
8292 uint32_t offPrev = s_aMembers[0].off;
8293 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8294 {
8295 Assert(s_aMembers[i].off > offPrev);
8296 offPrev = s_aMembers[i].off;
8297 }
8298 }
8299#endif
8300
8301 /*
8302 * Binary lookup.
8303 */
8304 unsigned iStart = 0;
8305 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8306 for (;;)
8307 {
8308 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8309 uint32_t const offCur = s_aMembers[iCur].off;
8310 if (off < offCur)
8311 {
8312 if (iCur != iStart)
8313 iEnd = iCur;
8314 else
8315 break;
8316 }
8317 else if (off > offCur)
8318 {
8319 if (iCur + 1 < iEnd)
8320 iStart = iCur + 1;
8321 else
8322 break;
8323 }
8324 else
8325 return s_aMembers[iCur].pszName;
8326 }
8327#ifdef VBOX_WITH_STATISTICS
8328 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8329 return "iem.s.acThreadedFuncStats[iFn]";
8330#endif
8331 return NULL;
8332}
8333
8334
8335DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8336{
8337 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8338#if defined(RT_ARCH_AMD64)
8339 static const char * const a_apszMarkers[] =
8340 {
8341 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8342 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8343 };
8344#endif
8345
8346 char szDisBuf[512];
8347 DISSTATE Dis;
8348 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8349 uint32_t const cNative = pTb->Native.cInstructions;
8350 uint32_t offNative = 0;
8351#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8352 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8353#endif
8354 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8355 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8356 : DISCPUMODE_64BIT;
8357#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8358 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8359#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8360 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8361#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8362# error "Port me"
8363#else
8364 csh hDisasm = ~(size_t)0;
8365# if defined(RT_ARCH_AMD64)
8366 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8367# elif defined(RT_ARCH_ARM64)
8368 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8369# else
8370# error "Port me"
8371# endif
8372 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8373
8374 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8375 //Assert(rcCs == CS_ERR_OK);
8376#endif
8377
8378 /*
8379 * Print TB info.
8380 */
8381 pHlp->pfnPrintf(pHlp,
8382 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8383 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8384 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8385 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8386#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8387 if (pDbgInfo && pDbgInfo->cEntries > 1)
8388 {
8389 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8390
8391 /*
8392 * This disassembly is driven by the debug info which follows the native
8393 * code and indicates when it starts with the next guest instructions,
8394 * where labels are and such things.
8395 */
8396 uint32_t idxThreadedCall = 0;
8397 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8398 uint8_t idxRange = UINT8_MAX;
8399 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8400 uint32_t offRange = 0;
8401 uint32_t offOpcodes = 0;
8402 uint32_t const cbOpcodes = pTb->cbOpcodes;
8403 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8404 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8405 uint32_t iDbgEntry = 1;
8406 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8407
8408 while (offNative < cNative)
8409 {
8410 /* If we're at or have passed the point where the next chunk of debug
8411 info starts, process it. */
8412 if (offDbgNativeNext <= offNative)
8413 {
8414 offDbgNativeNext = UINT32_MAX;
8415 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8416 {
8417 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8418 {
8419 case kIemTbDbgEntryType_GuestInstruction:
8420 {
8421 /* Did the exec flag change? */
8422 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8423 {
8424 pHlp->pfnPrintf(pHlp,
8425 " fExec change %#08x -> %#08x %s\n",
8426 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8427 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8428 szDisBuf, sizeof(szDisBuf)));
8429 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8430 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8431 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8432 : DISCPUMODE_64BIT;
8433 }
8434
8435 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8436 where the compilation was aborted before the opcode was recorded and the actual
8437 instruction was translated to a threaded call. This may happen when we run out
8438 of ranges, or when some complicated interrupts/FFs are found to be pending or
8439 similar. So, we just deal with it here rather than in the compiler code as it
8440 is a lot simpler to do here. */
8441 if ( idxRange == UINT8_MAX
8442 || idxRange >= cRanges
8443 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8444 {
8445 idxRange += 1;
8446 if (idxRange < cRanges)
8447 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8448 else
8449 continue;
8450 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8451 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8452 + (pTb->aRanges[idxRange].idxPhysPage == 0
8453 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8454 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8455 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8456 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8457 pTb->aRanges[idxRange].idxPhysPage);
8458 GCPhysPc += offRange;
8459 }
8460
8461 /* Disassemble the instruction. */
8462 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8463 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8464 uint32_t cbInstr = 1;
8465 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8466 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8467 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8468 if (RT_SUCCESS(rc))
8469 {
8470 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8471 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8472 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8473 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8474
8475 static unsigned const s_offMarker = 55;
8476 static char const s_szMarker[] = " ; <--- guest";
8477 if (cch < s_offMarker)
8478 {
8479 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8480 cch = s_offMarker;
8481 }
8482 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8483 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8484
8485 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8486 }
8487 else
8488 {
8489 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8490 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8491 cbInstr = 1;
8492 }
8493 GCPhysPc += cbInstr;
8494 offOpcodes += cbInstr;
8495 offRange += cbInstr;
8496 continue;
8497 }
8498
8499 case kIemTbDbgEntryType_ThreadedCall:
8500 pHlp->pfnPrintf(pHlp,
8501 " Call #%u to %s (%u args) - %s\n",
8502 idxThreadedCall,
8503 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8504 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8505 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8506 idxThreadedCall++;
8507 continue;
8508
8509 case kIemTbDbgEntryType_GuestRegShadowing:
8510 {
8511 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8512 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8513 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8514 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8515 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8516 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8517 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8518 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8519 else
8520 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8521 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8522 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8523 continue;
8524 }
8525
8526#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8527 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8528 {
8529 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8530 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8531 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8532 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8533 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8534 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8535 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8536 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8537 else
8538 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8539 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8540 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8541 continue;
8542 }
8543#endif
8544
8545 case kIemTbDbgEntryType_Label:
8546 {
8547 const char *pszName = "what_the_fudge";
8548 const char *pszComment = "";
8549 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8550 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8551 {
8552 case kIemNativeLabelType_Return: pszName = "Return"; break;
8553 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8554 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8555 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8556 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8557 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8558 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8559 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8560 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8561 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8562 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8563 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8564 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8565 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8566 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8567 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8568 case kIemNativeLabelType_If:
8569 pszName = "If";
8570 fNumbered = true;
8571 break;
8572 case kIemNativeLabelType_Else:
8573 pszName = "Else";
8574 fNumbered = true;
8575 pszComment = " ; regs state restored pre-if-block";
8576 break;
8577 case kIemNativeLabelType_Endif:
8578 pszName = "Endif";
8579 fNumbered = true;
8580 break;
8581 case kIemNativeLabelType_CheckIrq:
8582 pszName = "CheckIrq_CheckVM";
8583 fNumbered = true;
8584 break;
8585 case kIemNativeLabelType_TlbLookup:
8586 pszName = "TlbLookup";
8587 fNumbered = true;
8588 break;
8589 case kIemNativeLabelType_TlbMiss:
8590 pszName = "TlbMiss";
8591 fNumbered = true;
8592 break;
8593 case kIemNativeLabelType_TlbDone:
8594 pszName = "TlbDone";
8595 fNumbered = true;
8596 break;
8597 case kIemNativeLabelType_Invalid:
8598 case kIemNativeLabelType_End:
8599 break;
8600 }
8601 if (fNumbered)
8602 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8603 else
8604 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8605 continue;
8606 }
8607
8608 case kIemTbDbgEntryType_NativeOffset:
8609 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8610 Assert(offDbgNativeNext >= offNative);
8611 break;
8612
8613#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8614 case kIemTbDbgEntryType_DelayedPcUpdate:
8615 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8616 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8617 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8618 continue;
8619#endif
8620
8621#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8622 case kIemTbDbgEntryType_GuestRegDirty:
8623 {
8624 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8625 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8626 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8627 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8628 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8629 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8630 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8631 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8632 pszGstReg, pszHstReg);
8633 continue;
8634 }
8635
8636 case kIemTbDbgEntryType_GuestRegWriteback:
8637 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8638 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8639 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
8640 continue;
8641#endif
8642
8643 default:
8644 AssertFailed();
8645 }
8646 iDbgEntry++;
8647 break;
8648 }
8649 }
8650
8651 /*
8652 * Disassemble the next native instruction.
8653 */
8654 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8655# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8656 uint32_t cbInstr = sizeof(paNative[0]);
8657 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8658 if (RT_SUCCESS(rc))
8659 {
8660# if defined(RT_ARCH_AMD64)
8661 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8662 {
8663 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8664 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8665 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8666 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8667 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8668 uInfo & 0x8000 ? "recompiled" : "todo");
8669 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8670 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8671 else
8672 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8673 }
8674 else
8675# endif
8676 {
8677 const char *pszAnnotation = NULL;
8678# ifdef RT_ARCH_AMD64
8679 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8680 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8681 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8682 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8683 PCDISOPPARAM pMemOp;
8684 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8685 pMemOp = &Dis.Param1;
8686 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8687 pMemOp = &Dis.Param2;
8688 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8689 pMemOp = &Dis.Param3;
8690 else
8691 pMemOp = NULL;
8692 if ( pMemOp
8693 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8694 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8695 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8696 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8697
8698#elif defined(RT_ARCH_ARM64)
8699 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8700 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8701 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8702# else
8703# error "Port me"
8704# endif
8705 if (pszAnnotation)
8706 {
8707 static unsigned const s_offAnnotation = 55;
8708 size_t const cchAnnotation = strlen(pszAnnotation);
8709 size_t cchDis = strlen(szDisBuf);
8710 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8711 {
8712 if (cchDis < s_offAnnotation)
8713 {
8714 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8715 cchDis = s_offAnnotation;
8716 }
8717 szDisBuf[cchDis++] = ' ';
8718 szDisBuf[cchDis++] = ';';
8719 szDisBuf[cchDis++] = ' ';
8720 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8721 }
8722 }
8723 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8724 }
8725 }
8726 else
8727 {
8728# if defined(RT_ARCH_AMD64)
8729 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8730 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8731# elif defined(RT_ARCH_ARM64)
8732 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8733# else
8734# error "Port me"
8735# endif
8736 cbInstr = sizeof(paNative[0]);
8737 }
8738 offNative += cbInstr / sizeof(paNative[0]);
8739
8740# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8741 cs_insn *pInstr;
8742 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8743 (uintptr_t)pNativeCur, 1, &pInstr);
8744 if (cInstrs > 0)
8745 {
8746 Assert(cInstrs == 1);
8747 const char *pszAnnotation = NULL;
8748# if defined(RT_ARCH_ARM64)
8749 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8750 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8751 {
8752 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8753 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8754 char *psz = strchr(pInstr->op_str, '[');
8755 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8756 {
8757 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8758 int32_t off = -1;
8759 psz += 4;
8760 if (*psz == ']')
8761 off = 0;
8762 else if (*psz == ',')
8763 {
8764 psz = RTStrStripL(psz + 1);
8765 if (*psz == '#')
8766 off = RTStrToInt32(&psz[1]);
8767 /** @todo deal with index registers and LSL as well... */
8768 }
8769 if (off >= 0)
8770 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8771 }
8772 }
8773# endif
8774
8775 size_t const cchOp = strlen(pInstr->op_str);
8776# if defined(RT_ARCH_AMD64)
8777 if (pszAnnotation)
8778 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8779 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8780 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8781 else
8782 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8783 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8784
8785# else
8786 if (pszAnnotation)
8787 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8788 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8789 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8790 else
8791 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8792 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8793# endif
8794 offNative += pInstr->size / sizeof(*pNativeCur);
8795 cs_free(pInstr, cInstrs);
8796 }
8797 else
8798 {
8799# if defined(RT_ARCH_AMD64)
8800 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8801 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8802# else
8803 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8804# endif
8805 offNative++;
8806 }
8807# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8808 }
8809 }
8810 else
8811#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8812 {
8813 /*
8814 * No debug info, just disassemble the x86 code and then the native code.
8815 *
8816 * First the guest code:
8817 */
8818 for (unsigned i = 0; i < pTb->cRanges; i++)
8819 {
8820 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8821 + (pTb->aRanges[i].idxPhysPage == 0
8822 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8823 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8824 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8825 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8826 unsigned off = pTb->aRanges[i].offOpcodes;
8827 /** @todo this ain't working when crossing pages! */
8828 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8829 while (off < cbOpcodes)
8830 {
8831 uint32_t cbInstr = 1;
8832 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8833 &pTb->pabOpcodes[off], cbOpcodes - off,
8834 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8835 if (RT_SUCCESS(rc))
8836 {
8837 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8838 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8839 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8840 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8841 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8842 GCPhysPc += cbInstr;
8843 off += cbInstr;
8844 }
8845 else
8846 {
8847 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8848 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8849 break;
8850 }
8851 }
8852 }
8853
8854 /*
8855 * Then the native code:
8856 */
8857 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8858 while (offNative < cNative)
8859 {
8860 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8861# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8862 uint32_t cbInstr = sizeof(paNative[0]);
8863 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8864 if (RT_SUCCESS(rc))
8865 {
8866# if defined(RT_ARCH_AMD64)
8867 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8868 {
8869 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8870 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8871 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8872 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8873 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8874 uInfo & 0x8000 ? "recompiled" : "todo");
8875 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8876 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8877 else
8878 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8879 }
8880 else
8881# endif
8882 {
8883# ifdef RT_ARCH_AMD64
8884 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8885 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8886 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8887 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8888# elif defined(RT_ARCH_ARM64)
8889 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8890 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8891 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8892# else
8893# error "Port me"
8894# endif
8895 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8896 }
8897 }
8898 else
8899 {
8900# if defined(RT_ARCH_AMD64)
8901 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8902 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8903# else
8904 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8905# endif
8906 cbInstr = sizeof(paNative[0]);
8907 }
8908 offNative += cbInstr / sizeof(paNative[0]);
8909
8910# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8911 cs_insn *pInstr;
8912 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8913 (uintptr_t)pNativeCur, 1, &pInstr);
8914 if (cInstrs > 0)
8915 {
8916 Assert(cInstrs == 1);
8917# if defined(RT_ARCH_AMD64)
8918 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8919 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8920# else
8921 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8922 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8923# endif
8924 offNative += pInstr->size / sizeof(*pNativeCur);
8925 cs_free(pInstr, cInstrs);
8926 }
8927 else
8928 {
8929# if defined(RT_ARCH_AMD64)
8930 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8931 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8932# else
8933 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8934# endif
8935 offNative++;
8936 }
8937# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8938 }
8939 }
8940
8941#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8942 /* Cleanup. */
8943 cs_close(&hDisasm);
8944#endif
8945}
8946
8947
8948/**
8949 * Recompiles the given threaded TB into a native one.
8950 *
8951 * In case of failure the translation block will be returned as-is.
8952 *
8953 * @returns pTb.
8954 * @param pVCpu The cross context virtual CPU structure of the calling
8955 * thread.
8956 * @param pTb The threaded translation to recompile to native.
8957 */
8958DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8959{
8960 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8961
8962 /*
8963 * The first time thru, we allocate the recompiler state, the other times
8964 * we just need to reset it before using it again.
8965 */
8966 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8967 if (RT_LIKELY(pReNative))
8968 iemNativeReInit(pReNative, pTb);
8969 else
8970 {
8971 pReNative = iemNativeInit(pVCpu, pTb);
8972 AssertReturn(pReNative, pTb);
8973 }
8974
8975#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8976 /*
8977 * First do liveness analysis. This is done backwards.
8978 */
8979 {
8980 uint32_t idxCall = pTb->Thrd.cCalls;
8981 if (idxCall <= pReNative->cLivenessEntriesAlloc)
8982 { /* likely */ }
8983 else
8984 {
8985 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
8986 while (idxCall > cAlloc)
8987 cAlloc *= 2;
8988 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
8989 AssertReturn(pvNew, pTb);
8990 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
8991 pReNative->cLivenessEntriesAlloc = cAlloc;
8992 }
8993 AssertReturn(idxCall > 0, pTb);
8994 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
8995
8996 /* The initial (final) entry. */
8997 idxCall--;
8998 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
8999
9000 /* Loop backwards thru the calls and fill in the other entries. */
9001 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9002 while (idxCall > 0)
9003 {
9004 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9005 if (pfnLiveness)
9006 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9007 else
9008 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9009 pCallEntry--;
9010 idxCall--;
9011 }
9012
9013# ifdef VBOX_WITH_STATISTICS
9014 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9015 to 'clobbered' rather that 'input'. */
9016 /** @todo */
9017# endif
9018 }
9019#endif
9020
9021 /*
9022 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9023 * for aborting if an error happens.
9024 */
9025 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9026#ifdef LOG_ENABLED
9027 uint32_t const cCallsOrg = cCallsLeft;
9028#endif
9029 uint32_t off = 0;
9030 int rc = VINF_SUCCESS;
9031 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9032 {
9033 /*
9034 * Emit prolog code (fixed).
9035 */
9036 off = iemNativeEmitProlog(pReNative, off);
9037
9038 /*
9039 * Convert the calls to native code.
9040 */
9041#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9042 int32_t iGstInstr = -1;
9043#endif
9044#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9045 uint32_t cThreadedCalls = 0;
9046 uint32_t cRecompiledCalls = 0;
9047#endif
9048#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9049 uint32_t idxCurCall = 0;
9050#endif
9051 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9052 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9053 while (cCallsLeft-- > 0)
9054 {
9055 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9056#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9057 pReNative->idxCurCall = idxCurCall;
9058#endif
9059
9060 /*
9061 * Debug info, assembly markup and statistics.
9062 */
9063#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9064 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9065 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9066#endif
9067#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9068 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9069 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9070 {
9071 if (iGstInstr < (int32_t)pTb->cInstructions)
9072 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9073 else
9074 Assert(iGstInstr == pTb->cInstructions);
9075 iGstInstr = pCallEntry->idxInstr;
9076 }
9077 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9078#endif
9079#if defined(VBOX_STRICT)
9080 off = iemNativeEmitMarker(pReNative, off,
9081 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9082#endif
9083#if defined(VBOX_STRICT)
9084 iemNativeRegAssertSanity(pReNative);
9085#endif
9086#ifdef VBOX_WITH_STATISTICS
9087 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9088#endif
9089
9090 /*
9091 * Actual work.
9092 */
9093 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9094 pfnRecom ? "(recompiled)" : "(todo)"));
9095 if (pfnRecom) /** @todo stats on this. */
9096 {
9097 off = pfnRecom(pReNative, off, pCallEntry);
9098 STAM_REL_STATS({cRecompiledCalls++;});
9099 }
9100 else
9101 {
9102 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9103 STAM_REL_STATS({cThreadedCalls++;});
9104 }
9105 Assert(off <= pReNative->cInstrBufAlloc);
9106 Assert(pReNative->cCondDepth == 0);
9107
9108#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9109 if (LogIs2Enabled())
9110 {
9111 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9112# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9113 static const char s_achState[] = "CUXI";
9114# else
9115 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9116# endif
9117
9118 char szGpr[17];
9119 for (unsigned i = 0; i < 16; i++)
9120 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9121 szGpr[16] = '\0';
9122
9123 char szSegBase[X86_SREG_COUNT + 1];
9124 char szSegLimit[X86_SREG_COUNT + 1];
9125 char szSegAttrib[X86_SREG_COUNT + 1];
9126 char szSegSel[X86_SREG_COUNT + 1];
9127 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9128 {
9129 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9130 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9131 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9132 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9133 }
9134 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9135 = szSegSel[X86_SREG_COUNT] = '\0';
9136
9137 char szEFlags[8];
9138 for (unsigned i = 0; i < 7; i++)
9139 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9140 szEFlags[7] = '\0';
9141
9142 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9143 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9144 }
9145#endif
9146
9147 /*
9148 * Advance.
9149 */
9150 pCallEntry++;
9151#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9152 idxCurCall++;
9153#endif
9154 }
9155
9156 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9157 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9158 if (!cThreadedCalls)
9159 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9160
9161 /*
9162 * Emit the epilog code.
9163 */
9164 uint32_t idxReturnLabel;
9165 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9166
9167 /*
9168 * Generate special jump labels.
9169 */
9170 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9171 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9172 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9173 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9174
9175 /*
9176 * Generate simple TB tail labels that just calls a help with a pVCpu
9177 * arg and either return or longjmps/throws a non-zero status.
9178 *
9179 * The array entries must be ordered by enmLabel value so we can index
9180 * using fTailLabels bit numbers.
9181 */
9182 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9183 static struct
9184 {
9185 IEMNATIVELABELTYPE enmLabel;
9186 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9187 } const g_aSimpleTailLabels[] =
9188 {
9189 { kIemNativeLabelType_Invalid, NULL },
9190 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9191 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9192 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9193 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9194 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9195 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9196 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9197 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9198 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9199 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9200 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9201 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9202 };
9203 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9204 AssertCompile(kIemNativeLabelType_Invalid == 0);
9205 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9206 if (fTailLabels)
9207 {
9208 do
9209 {
9210 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9211 fTailLabels &= ~RT_BIT_64(enmLabel);
9212 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9213
9214 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9215 Assert(idxLabel != UINT32_MAX);
9216 if (idxLabel != UINT32_MAX)
9217 {
9218 iemNativeLabelDefine(pReNative, idxLabel, off);
9219
9220 /* int pfnCallback(PVMCPUCC pVCpu) */
9221 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9222 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9223
9224 /* jump back to the return sequence. */
9225 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9226 }
9227
9228 } while (fTailLabels);
9229 }
9230 }
9231 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9232 {
9233 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9234 return pTb;
9235 }
9236 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9237 Assert(off <= pReNative->cInstrBufAlloc);
9238
9239 /*
9240 * Make sure all labels has been defined.
9241 */
9242 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9243#ifdef VBOX_STRICT
9244 uint32_t const cLabels = pReNative->cLabels;
9245 for (uint32_t i = 0; i < cLabels; i++)
9246 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9247#endif
9248
9249 /*
9250 * Allocate executable memory, copy over the code we've generated.
9251 */
9252 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9253 if (pTbAllocator->pDelayedFreeHead)
9254 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9255
9256 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9257 AssertReturn(paFinalInstrBuf, pTb);
9258 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9259
9260 /*
9261 * Apply fixups.
9262 */
9263 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9264 uint32_t const cFixups = pReNative->cFixups;
9265 for (uint32_t i = 0; i < cFixups; i++)
9266 {
9267 Assert(paFixups[i].off < off);
9268 Assert(paFixups[i].idxLabel < cLabels);
9269 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9270 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9271 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9272 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9273 switch (paFixups[i].enmType)
9274 {
9275#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9276 case kIemNativeFixupType_Rel32:
9277 Assert(paFixups[i].off + 4 <= off);
9278 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9279 continue;
9280
9281#elif defined(RT_ARCH_ARM64)
9282 case kIemNativeFixupType_RelImm26At0:
9283 {
9284 Assert(paFixups[i].off < off);
9285 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9286 Assert(offDisp >= -262144 && offDisp < 262144);
9287 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9288 continue;
9289 }
9290
9291 case kIemNativeFixupType_RelImm19At5:
9292 {
9293 Assert(paFixups[i].off < off);
9294 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9295 Assert(offDisp >= -262144 && offDisp < 262144);
9296 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9297 continue;
9298 }
9299
9300 case kIemNativeFixupType_RelImm14At5:
9301 {
9302 Assert(paFixups[i].off < off);
9303 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9304 Assert(offDisp >= -8192 && offDisp < 8192);
9305 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9306 continue;
9307 }
9308
9309#endif
9310 case kIemNativeFixupType_Invalid:
9311 case kIemNativeFixupType_End:
9312 break;
9313 }
9314 AssertFailed();
9315 }
9316
9317 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9318 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9319
9320 /*
9321 * Convert the translation block.
9322 */
9323 RTMemFree(pTb->Thrd.paCalls);
9324 pTb->Native.paInstructions = paFinalInstrBuf;
9325 pTb->Native.cInstructions = off;
9326 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9327#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9328 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9329 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9330#endif
9331
9332 Assert(pTbAllocator->cThreadedTbs > 0);
9333 pTbAllocator->cThreadedTbs -= 1;
9334 pTbAllocator->cNativeTbs += 1;
9335 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9336
9337#ifdef LOG_ENABLED
9338 /*
9339 * Disassemble to the log if enabled.
9340 */
9341 if (LogIs3Enabled())
9342 {
9343 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9344 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9345# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9346 RTLogFlush(NULL);
9347# endif
9348 }
9349#endif
9350 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9351
9352 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9353 return pTb;
9354}
9355
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette