VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104210

Last change on this file since 104210 was 104210, checked in by vboxsync, 8 months ago

VMM/IEM: Made iemNativeVarRegisterAcquire automatically convert from immediate to stack so that IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR and friends works with absolute addresses. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 409.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104210 2024-04-06 00:38:26Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 iemRaiseDivideErrorJmp(pVCpu);
138#ifndef _MSC_VER
139 return VINF_IEM_RAISED_XCPT; /* not reached */
140#endif
141}
142
143
144/**
145 * Used by TB code when it wants to raise a \#UD.
146 */
147IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
148{
149 iemRaiseUndefinedOpcodeJmp(pVCpu);
150#ifndef _MSC_VER
151 return VINF_IEM_RAISED_XCPT; /* not reached */
152#endif
153}
154
155
156/**
157 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
158 *
159 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
160 */
161IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
162{
163 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
164 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
165 iemRaiseUndefinedOpcodeJmp(pVCpu);
166 else
167 iemRaiseDeviceNotAvailableJmp(pVCpu);
168#ifndef _MSC_VER
169 return VINF_IEM_RAISED_XCPT; /* not reached */
170#endif
171}
172
173
174/**
175 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
176 *
177 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
178 */
179IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
180{
181 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
182 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
183 iemRaiseUndefinedOpcodeJmp(pVCpu);
184 else
185 iemRaiseDeviceNotAvailableJmp(pVCpu);
186#ifndef _MSC_VER
187 return VINF_IEM_RAISED_XCPT; /* not reached */
188#endif
189}
190
191
192/**
193 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
194 *
195 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
196 */
197IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
198{
199 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
200 iemRaiseSimdFpExceptionJmp(pVCpu);
201 else
202 iemRaiseUndefinedOpcodeJmp(pVCpu);
203#ifndef _MSC_VER
204 return VINF_IEM_RAISED_XCPT; /* not reached */
205#endif
206}
207
208
209/**
210 * Used by TB code when it wants to raise a \#NM.
211 */
212IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
213{
214 iemRaiseDeviceNotAvailableJmp(pVCpu);
215#ifndef _MSC_VER
216 return VINF_IEM_RAISED_XCPT; /* not reached */
217#endif
218}
219
220
221/**
222 * Used by TB code when it wants to raise a \#GP(0).
223 */
224IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
225{
226 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
227#ifndef _MSC_VER
228 return VINF_IEM_RAISED_XCPT; /* not reached */
229#endif
230}
231
232
233/**
234 * Used by TB code when it wants to raise a \#MF.
235 */
236IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
237{
238 iemRaiseMathFaultJmp(pVCpu);
239#ifndef _MSC_VER
240 return VINF_IEM_RAISED_XCPT; /* not reached */
241#endif
242}
243
244
245/**
246 * Used by TB code when it wants to raise a \#XF.
247 */
248IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
249{
250 iemRaiseSimdFpExceptionJmp(pVCpu);
251#ifndef _MSC_VER
252 return VINF_IEM_RAISED_XCPT; /* not reached */
253#endif
254}
255
256
257/**
258 * Used by TB code when detecting opcode changes.
259 * @see iemThreadeFuncWorkerObsoleteTb
260 */
261IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
262{
263 /* We set fSafeToFree to false where as we're being called in the context
264 of a TB callback function, which for native TBs means we cannot release
265 the executable memory till we've returned our way back to iemTbExec as
266 that return path codes via the native code generated for the TB. */
267 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
268 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
269 return VINF_IEM_REEXEC_BREAK;
270}
271
272
273/**
274 * Used by TB code when we need to switch to a TB with CS.LIM checking.
275 */
276IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
277{
278 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
279 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
280 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
281 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
282 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
283 return VINF_IEM_REEXEC_BREAK;
284}
285
286
287/**
288 * Used by TB code when we missed a PC check after a branch.
289 */
290IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
291{
292 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
293 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
294 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
295 pVCpu->iem.s.pbInstrBuf));
296 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
297 return VINF_IEM_REEXEC_BREAK;
298}
299
300
301
302/*********************************************************************************************************************************
303* Helpers: Segmented memory fetches and stores. *
304*********************************************************************************************************************************/
305
306/**
307 * Used by TB code to load unsigned 8-bit data w/ segmentation.
308 */
309IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
310{
311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
312 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
313#else
314 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
315#endif
316}
317
318
319/**
320 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
321 * to 16 bits.
322 */
323IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
324{
325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
326 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
327#else
328 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
329#endif
330}
331
332
333/**
334 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
335 * to 32 bits.
336 */
337IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
338{
339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
340 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
341#else
342 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
343#endif
344}
345
346/**
347 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
348 * to 64 bits.
349 */
350IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
351{
352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
353 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
354#else
355 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
356#endif
357}
358
359
360/**
361 * Used by TB code to load unsigned 16-bit data w/ segmentation.
362 */
363IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
364{
365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
366 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
367#else
368 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
369#endif
370}
371
372
373/**
374 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
375 * to 32 bits.
376 */
377IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
378{
379#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
380 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
381#else
382 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
383#endif
384}
385
386
387/**
388 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
389 * to 64 bits.
390 */
391IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
392{
393#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
394 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
395#else
396 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
397#endif
398}
399
400
401/**
402 * Used by TB code to load unsigned 32-bit data w/ segmentation.
403 */
404IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
405{
406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
407 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
408#else
409 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
410#endif
411}
412
413
414/**
415 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
416 * to 64 bits.
417 */
418IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
419{
420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
421 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
422#else
423 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
424#endif
425}
426
427
428/**
429 * Used by TB code to load unsigned 64-bit data w/ segmentation.
430 */
431IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
432{
433#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
434 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
435#else
436 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
437#endif
438}
439
440
441#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
442/**
443 * Used by TB code to load 128-bit data w/ segmentation.
444 */
445IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
446{
447#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
448 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
449#else
450 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
451#endif
452}
453
454
455/**
456 * Used by TB code to load 128-bit data w/ segmentation.
457 */
458IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
459{
460#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
461 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
462#else
463 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
464#endif
465}
466
467
468/**
469 * Used by TB code to load 128-bit data w/ segmentation.
470 */
471IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
472{
473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
474 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
475#else
476 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
477#endif
478}
479
480
481/**
482 * Used by TB code to load 256-bit data w/ segmentation.
483 */
484IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
485{
486#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
487 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
488#else
489 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
490#endif
491}
492
493
494/**
495 * Used by TB code to load 256-bit data w/ segmentation.
496 */
497IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
498{
499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
500 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
501#else
502 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
503#endif
504}
505#endif
506
507
508/**
509 * Used by TB code to store unsigned 8-bit data w/ segmentation.
510 */
511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
512{
513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
514 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
515#else
516 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
517#endif
518}
519
520
521/**
522 * Used by TB code to store unsigned 16-bit data w/ segmentation.
523 */
524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
525{
526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
527 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
528#else
529 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
530#endif
531}
532
533
534/**
535 * Used by TB code to store unsigned 32-bit data w/ segmentation.
536 */
537IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
538{
539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
540 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
541#else
542 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
543#endif
544}
545
546
547/**
548 * Used by TB code to store unsigned 64-bit data w/ segmentation.
549 */
550IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
551{
552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
553 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
554#else
555 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
556#endif
557}
558
559
560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
561/**
562 * Used by TB code to store unsigned 128-bit data w/ segmentation.
563 */
564IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
565{
566#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
567 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
568#else
569 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
570#endif
571}
572
573
574/**
575 * Used by TB code to store unsigned 128-bit data w/ segmentation.
576 */
577IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
578{
579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
580 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
581#else
582 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
583#endif
584}
585
586
587/**
588 * Used by TB code to store unsigned 256-bit data w/ segmentation.
589 */
590IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
591{
592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
593 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
594#else
595 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
596#endif
597}
598
599
600/**
601 * Used by TB code to store unsigned 256-bit data w/ segmentation.
602 */
603IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
604{
605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
606 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
607#else
608 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
609#endif
610}
611#endif
612
613
614
615/**
616 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
617 */
618IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
619{
620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
621 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
622#else
623 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
624#endif
625}
626
627
628/**
629 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
630 */
631IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
632{
633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
634 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
635#else
636 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
637#endif
638}
639
640
641/**
642 * Used by TB code to store an 32-bit selector value onto a generic stack.
643 *
644 * Intel CPUs doesn't do write a whole dword, thus the special function.
645 */
646IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
647{
648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
649 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
650#else
651 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
652#endif
653}
654
655
656/**
657 * Used by TB code to push unsigned 64-bit value onto a generic stack.
658 */
659IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
660{
661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
662 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
663#else
664 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
665#endif
666}
667
668
669/**
670 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
671 */
672IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
673{
674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
675 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
676#else
677 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
678#endif
679}
680
681
682/**
683 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
684 */
685IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
686{
687#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
688 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
689#else
690 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
691#endif
692}
693
694
695/**
696 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
697 */
698IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
699{
700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
701 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
702#else
703 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
704#endif
705}
706
707
708
709/*********************************************************************************************************************************
710* Helpers: Flat memory fetches and stores. *
711*********************************************************************************************************************************/
712
713/**
714 * Used by TB code to load unsigned 8-bit data w/ flat address.
715 * @note Zero extending the value to 64-bit to simplify assembly.
716 */
717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
721#else
722 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
729 * to 16 bits.
730 * @note Zero extending the value to 64-bit to simplify assembly.
731 */
732IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
733{
734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
735 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
736#else
737 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
738#endif
739}
740
741
742/**
743 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
744 * to 32 bits.
745 * @note Zero extending the value to 64-bit to simplify assembly.
746 */
747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
748{
749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
750 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
751#else
752 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
753#endif
754}
755
756
757/**
758 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
759 * to 64 bits.
760 */
761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
762{
763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
764 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
765#else
766 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
767#endif
768}
769
770
771/**
772 * Used by TB code to load unsigned 16-bit data w/ flat address.
773 * @note Zero extending the value to 64-bit to simplify assembly.
774 */
775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
776{
777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
778 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
779#else
780 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
781#endif
782}
783
784
785/**
786 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
787 * to 32 bits.
788 * @note Zero extending the value to 64-bit to simplify assembly.
789 */
790IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
791{
792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
793 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
794#else
795 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
796#endif
797}
798
799
800/**
801 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
802 * to 64 bits.
803 * @note Zero extending the value to 64-bit to simplify assembly.
804 */
805IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
806{
807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
808 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
809#else
810 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
811#endif
812}
813
814
815/**
816 * Used by TB code to load unsigned 32-bit data w/ flat address.
817 * @note Zero extending the value to 64-bit to simplify assembly.
818 */
819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
820{
821#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
822 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
823#else
824 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
825#endif
826}
827
828
829/**
830 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
831 * to 64 bits.
832 * @note Zero extending the value to 64-bit to simplify assembly.
833 */
834IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
835{
836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
837 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
838#else
839 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
840#endif
841}
842
843
844/**
845 * Used by TB code to load unsigned 64-bit data w/ flat address.
846 */
847IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
848{
849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
850 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
851#else
852 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
853#endif
854}
855
856
857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
858/**
859 * Used by TB code to load unsigned 128-bit data w/ flat address.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
864 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
865#else
866 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
867#endif
868}
869
870
871/**
872 * Used by TB code to load unsigned 128-bit data w/ flat address.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
877 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
878#else
879 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
880#endif
881}
882
883
884/**
885 * Used by TB code to load unsigned 128-bit data w/ flat address.
886 */
887IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
888{
889#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
890 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
891#else
892 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
893#endif
894}
895
896
897/**
898 * Used by TB code to load unsigned 256-bit data w/ flat address.
899 */
900IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
901{
902#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
903 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
904#else
905 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
906#endif
907}
908
909
910/**
911 * Used by TB code to load unsigned 256-bit data w/ flat address.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
916 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
917#else
918 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
919#endif
920}
921#endif
922
923
924/**
925 * Used by TB code to store unsigned 8-bit data w/ flat address.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
930 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
931#else
932 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to store unsigned 16-bit data w/ flat address.
939 */
940IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
943 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
944#else
945 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
946#endif
947}
948
949
950/**
951 * Used by TB code to store unsigned 32-bit data w/ flat address.
952 */
953IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
956 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
957#else
958 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
959#endif
960}
961
962
963/**
964 * Used by TB code to store unsigned 64-bit data w/ flat address.
965 */
966IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
969 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
970#else
971 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
972#endif
973}
974
975
976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
977/**
978 * Used by TB code to store unsigned 128-bit data w/ flat address.
979 */
980IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
981{
982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
983 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
984#else
985 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
986#endif
987}
988
989
990/**
991 * Used by TB code to store unsigned 128-bit data w/ flat address.
992 */
993IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
994{
995#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
996 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
997#else
998 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
999#endif
1000}
1001
1002
1003/**
1004 * Used by TB code to store unsigned 256-bit data w/ flat address.
1005 */
1006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1007{
1008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1009 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1010#else
1011 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1012#endif
1013}
1014
1015
1016/**
1017 * Used by TB code to store unsigned 256-bit data w/ flat address.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1022 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1023#else
1024 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1025#endif
1026}
1027#endif
1028
1029
1030
1031/**
1032 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1037 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1038#else
1039 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1050 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1051#else
1052 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to store a segment selector value onto a flat stack.
1059 *
1060 * Intel CPUs doesn't do write a whole dword, thus the special function.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1065 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1066#else
1067 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1074 */
1075IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1076{
1077#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1078 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1079#else
1080 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1081#endif
1082}
1083
1084
1085/**
1086 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1087 */
1088IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1089{
1090#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1091 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1092#else
1093 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1094#endif
1095}
1096
1097
1098/**
1099 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1104 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1105#else
1106 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1117 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1118#else
1119 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124
1125/*********************************************************************************************************************************
1126* Helpers: Segmented memory mapping. *
1127*********************************************************************************************************************************/
1128
1129/**
1130 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1131 * segmentation.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1134 RTGCPTR GCPtrMem, uint8_t iSegReg))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1137 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1138#else
1139 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1148 RTGCPTR GCPtrMem, uint8_t iSegReg))
1149{
1150#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1151 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1152#else
1153 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1154#endif
1155}
1156
1157
1158/**
1159 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1160 */
1161IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1162 RTGCPTR GCPtrMem, uint8_t iSegReg))
1163{
1164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1165 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1166#else
1167 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1168#endif
1169}
1170
1171
1172/**
1173 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1174 */
1175IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1176 RTGCPTR GCPtrMem, uint8_t iSegReg))
1177{
1178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1179 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1180#else
1181 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1182#endif
1183}
1184
1185
1186/**
1187 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1188 * segmentation.
1189 */
1190IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1191 RTGCPTR GCPtrMem, uint8_t iSegReg))
1192{
1193#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1194 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1195#else
1196 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1197#endif
1198}
1199
1200
1201/**
1202 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1203 */
1204IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1205 RTGCPTR GCPtrMem, uint8_t iSegReg))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1208 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1209#else
1210 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1219 RTGCPTR GCPtrMem, uint8_t iSegReg))
1220{
1221#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1222 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1223#else
1224 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1225#endif
1226}
1227
1228
1229/**
1230 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1231 */
1232IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1233 RTGCPTR GCPtrMem, uint8_t iSegReg))
1234{
1235#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1236 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1237#else
1238 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1239#endif
1240}
1241
1242
1243/**
1244 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1245 * segmentation.
1246 */
1247IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1248 RTGCPTR GCPtrMem, uint8_t iSegReg))
1249{
1250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1251 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1252#else
1253 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1254#endif
1255}
1256
1257
1258/**
1259 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1260 */
1261IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1262 RTGCPTR GCPtrMem, uint8_t iSegReg))
1263{
1264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1265 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1266#else
1267 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1268#endif
1269}
1270
1271
1272/**
1273 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1274 */
1275IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1276 RTGCPTR GCPtrMem, uint8_t iSegReg))
1277{
1278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1279 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1280#else
1281 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1282#endif
1283}
1284
1285
1286/**
1287 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1288 */
1289IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1290 RTGCPTR GCPtrMem, uint8_t iSegReg))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1293 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1294#else
1295 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1296#endif
1297}
1298
1299
1300/**
1301 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1302 * segmentation.
1303 */
1304IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1305 RTGCPTR GCPtrMem, uint8_t iSegReg))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1308 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1309#else
1310 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1319 RTGCPTR GCPtrMem, uint8_t iSegReg))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1322 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1323#else
1324 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1331 */
1332IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1333 RTGCPTR GCPtrMem, uint8_t iSegReg))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1336 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1337#else
1338 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1347 RTGCPTR GCPtrMem, uint8_t iSegReg))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1350 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1351#else
1352 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1361 RTGCPTR GCPtrMem, uint8_t iSegReg))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1364 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1365#else
1366 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1375 RTGCPTR GCPtrMem, uint8_t iSegReg))
1376{
1377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1378 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1379#else
1380 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1381#endif
1382}
1383
1384
1385/**
1386 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1387 * segmentation.
1388 */
1389IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1390 RTGCPTR GCPtrMem, uint8_t iSegReg))
1391{
1392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1393 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1394#else
1395 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1396#endif
1397}
1398
1399
1400/**
1401 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1402 */
1403IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1404 RTGCPTR GCPtrMem, uint8_t iSegReg))
1405{
1406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1407 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1408#else
1409 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#endif
1411}
1412
1413
1414/**
1415 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1416 */
1417IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1418 RTGCPTR GCPtrMem, uint8_t iSegReg))
1419{
1420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1421 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1422#else
1423 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#endif
1425}
1426
1427
1428/**
1429 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1430 */
1431IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1432 RTGCPTR GCPtrMem, uint8_t iSegReg))
1433{
1434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1435 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1436#else
1437 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#endif
1439}
1440
1441
1442/*********************************************************************************************************************************
1443* Helpers: Flat memory mapping. *
1444*********************************************************************************************************************************/
1445
1446/**
1447 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1448 * address.
1449 */
1450IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1451{
1452#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1453 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1454#else
1455 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1456#endif
1457}
1458
1459
1460/**
1461 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1467#else
1468 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1480#else
1481 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1490{
1491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1492 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1493#else
1494 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1495#endif
1496}
1497
1498
1499/**
1500 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1501 * address.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1504{
1505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1506 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1507#else
1508 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1509#endif
1510}
1511
1512
1513/**
1514 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1515 */
1516IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1517{
1518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1519 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1520#else
1521 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1522#endif
1523}
1524
1525
1526/**
1527 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1528 */
1529IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1530{
1531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1532 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1533#else
1534 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1535#endif
1536}
1537
1538
1539/**
1540 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1541 */
1542IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1543{
1544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1545 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1546#else
1547 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1548#endif
1549}
1550
1551
1552/**
1553 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1554 * address.
1555 */
1556IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1557{
1558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1559 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1560#else
1561 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1562#endif
1563}
1564
1565
1566/**
1567 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1568 */
1569IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1570{
1571#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1572 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1573#else
1574 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1575#endif
1576}
1577
1578
1579/**
1580 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1581 */
1582IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1583{
1584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1585 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1586#else
1587 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1588#endif
1589}
1590
1591
1592/**
1593 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1594 */
1595IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1599#else
1600 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1607 * address.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1613#else
1614 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1623{
1624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1625 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1626#else
1627 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1628#endif
1629}
1630
1631
1632/**
1633 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1636{
1637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1638 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1639#else
1640 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1641#endif
1642}
1643
1644
1645/**
1646 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1652#else
1653 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1665#else
1666 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1675{
1676#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1677 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1678#else
1679 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1680#endif
1681}
1682
1683
1684/**
1685 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1686 * address.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1689{
1690#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1691 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1692#else
1693 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1694#endif
1695}
1696
1697
1698/**
1699 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1704 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1705#else
1706 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1715{
1716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1717 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1718#else
1719 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1720#endif
1721}
1722
1723
1724/**
1725 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1730 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1731#else
1732 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1733#endif
1734}
1735
1736
1737/*********************************************************************************************************************************
1738* Helpers: Commit, rollback & unmap *
1739*********************************************************************************************************************************/
1740
1741/**
1742 * Used by TB code to commit and unmap a read-write memory mapping.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1745{
1746 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1747}
1748
1749
1750/**
1751 * Used by TB code to commit and unmap a read-write memory mapping.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1754{
1755 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1756}
1757
1758
1759/**
1760 * Used by TB code to commit and unmap a write-only memory mapping.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1763{
1764 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1765}
1766
1767
1768/**
1769 * Used by TB code to commit and unmap a read-only memory mapping.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1772{
1773 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1774}
1775
1776
1777/**
1778 * Reinitializes the native recompiler state.
1779 *
1780 * Called before starting a new recompile job.
1781 */
1782static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1783{
1784 pReNative->cLabels = 0;
1785 pReNative->bmLabelTypes = 0;
1786 pReNative->cFixups = 0;
1787#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1788 pReNative->pDbgInfo->cEntries = 0;
1789 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1790#endif
1791 pReNative->pTbOrg = pTb;
1792 pReNative->cCondDepth = 0;
1793 pReNative->uCondSeqNo = 0;
1794 pReNative->uCheckIrqSeqNo = 0;
1795 pReNative->uTlbSeqNo = 0;
1796
1797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1798 pReNative->Core.offPc = 0;
1799 pReNative->Core.cInstrPcUpdateSkipped = 0;
1800#endif
1801#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1802 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1803#endif
1804 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1805#if IEMNATIVE_HST_GREG_COUNT < 32
1806 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1807#endif
1808 ;
1809 pReNative->Core.bmHstRegsWithGstShadow = 0;
1810 pReNative->Core.bmGstRegShadows = 0;
1811#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1812 pReNative->Core.bmGstRegShadowDirty = 0;
1813#endif
1814 pReNative->Core.bmVars = 0;
1815 pReNative->Core.bmStack = 0;
1816 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1817 pReNative->Core.u64ArgVars = UINT64_MAX;
1818
1819 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
1820 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1821 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1822 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1823 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1824 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1825 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1826 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1827 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1828 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1829 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1830 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1837
1838 /* Full host register reinit: */
1839 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1840 {
1841 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1842 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1843 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1844 }
1845
1846 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1847 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1848#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1849 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1850#endif
1851#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1852 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1853#endif
1854#ifdef IEMNATIVE_REG_FIXED_TMP1
1855 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1856#endif
1857#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1858 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1859#endif
1860 );
1861 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1862 {
1863 fRegs &= ~RT_BIT_32(idxReg);
1864 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1865 }
1866
1867 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1868#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1869 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1870#endif
1871#ifdef IEMNATIVE_REG_FIXED_TMP0
1872 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1873#endif
1874#ifdef IEMNATIVE_REG_FIXED_TMP1
1875 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1876#endif
1877#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1878 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1879#endif
1880
1881#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1882 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1883# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1884 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1885# endif
1886 ;
1887 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1888 pReNative->Core.bmGstSimdRegShadows = 0;
1889 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1890 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1891
1892 /* Full host register reinit: */
1893 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1894 {
1895 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1896 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1897 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1898 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1899 }
1900
1901 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1902 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1903 {
1904 fRegs &= ~RT_BIT_32(idxReg);
1905 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1906 }
1907
1908#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1909 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1910#endif
1911
1912#endif
1913
1914 return pReNative;
1915}
1916
1917
1918/**
1919 * Allocates and initializes the native recompiler state.
1920 *
1921 * This is called the first time an EMT wants to recompile something.
1922 *
1923 * @returns Pointer to the new recompiler state.
1924 * @param pVCpu The cross context virtual CPU structure of the calling
1925 * thread.
1926 * @param pTb The TB that's about to be recompiled.
1927 * @thread EMT(pVCpu)
1928 */
1929static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1930{
1931 VMCPU_ASSERT_EMT(pVCpu);
1932
1933 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1934 AssertReturn(pReNative, NULL);
1935
1936 /*
1937 * Try allocate all the buffers and stuff we need.
1938 */
1939 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1940 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1941 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1943 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1944#endif
1945 if (RT_LIKELY( pReNative->pInstrBuf
1946 && pReNative->paLabels
1947 && pReNative->paFixups)
1948#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1949 && pReNative->pDbgInfo
1950#endif
1951 )
1952 {
1953 /*
1954 * Set the buffer & array sizes on success.
1955 */
1956 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1957 pReNative->cLabelsAlloc = _8K;
1958 pReNative->cFixupsAlloc = _16K;
1959#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1960 pReNative->cDbgInfoAlloc = _16K;
1961#endif
1962
1963 /* Other constant stuff: */
1964 pReNative->pVCpu = pVCpu;
1965
1966 /*
1967 * Done, just need to save it and reinit it.
1968 */
1969 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1970 return iemNativeReInit(pReNative, pTb);
1971 }
1972
1973 /*
1974 * Failed. Cleanup and return.
1975 */
1976 AssertFailed();
1977 RTMemFree(pReNative->pInstrBuf);
1978 RTMemFree(pReNative->paLabels);
1979 RTMemFree(pReNative->paFixups);
1980#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1981 RTMemFree(pReNative->pDbgInfo);
1982#endif
1983 RTMemFree(pReNative);
1984 return NULL;
1985}
1986
1987
1988/**
1989 * Creates a label
1990 *
1991 * If the label does not yet have a defined position,
1992 * call iemNativeLabelDefine() later to set it.
1993 *
1994 * @returns Label ID. Throws VBox status code on failure, so no need to check
1995 * the return value.
1996 * @param pReNative The native recompile state.
1997 * @param enmType The label type.
1998 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1999 * label is not yet defined (default).
2000 * @param uData Data associated with the lable. Only applicable to
2001 * certain type of labels. Default is zero.
2002 */
2003DECL_HIDDEN_THROW(uint32_t)
2004iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2005 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2006{
2007 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2008
2009 /*
2010 * Locate existing label definition.
2011 *
2012 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2013 * and uData is zero.
2014 */
2015 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2016 uint32_t const cLabels = pReNative->cLabels;
2017 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2018#ifndef VBOX_STRICT
2019 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2020 && offWhere == UINT32_MAX
2021 && uData == 0
2022#endif
2023 )
2024 {
2025#ifndef VBOX_STRICT
2026 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2027 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2028 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2029 if (idxLabel < pReNative->cLabels)
2030 return idxLabel;
2031#else
2032 for (uint32_t i = 0; i < cLabels; i++)
2033 if ( paLabels[i].enmType == enmType
2034 && paLabels[i].uData == uData)
2035 {
2036 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2037 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2038 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2039 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2040 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2041 return i;
2042 }
2043 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2044 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2045#endif
2046 }
2047
2048 /*
2049 * Make sure we've got room for another label.
2050 */
2051 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2052 { /* likely */ }
2053 else
2054 {
2055 uint32_t cNew = pReNative->cLabelsAlloc;
2056 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2057 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2058 cNew *= 2;
2059 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2060 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2061 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2062 pReNative->paLabels = paLabels;
2063 pReNative->cLabelsAlloc = cNew;
2064 }
2065
2066 /*
2067 * Define a new label.
2068 */
2069 paLabels[cLabels].off = offWhere;
2070 paLabels[cLabels].enmType = enmType;
2071 paLabels[cLabels].uData = uData;
2072 pReNative->cLabels = cLabels + 1;
2073
2074 Assert((unsigned)enmType < 64);
2075 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2076
2077 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2078 {
2079 Assert(uData == 0);
2080 pReNative->aidxUniqueLabels[enmType] = cLabels;
2081 }
2082
2083 if (offWhere != UINT32_MAX)
2084 {
2085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2086 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2087 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2088#endif
2089 }
2090 return cLabels;
2091}
2092
2093
2094/**
2095 * Defines the location of an existing label.
2096 *
2097 * @param pReNative The native recompile state.
2098 * @param idxLabel The label to define.
2099 * @param offWhere The position.
2100 */
2101DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2102{
2103 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2104 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2105 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2106 pLabel->off = offWhere;
2107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2108 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2109 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2110#endif
2111}
2112
2113
2114/**
2115 * Looks up a lable.
2116 *
2117 * @returns Label ID if found, UINT32_MAX if not.
2118 */
2119static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2120 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2121{
2122 Assert((unsigned)enmType < 64);
2123 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2124 {
2125 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2126 return pReNative->aidxUniqueLabels[enmType];
2127
2128 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2129 uint32_t const cLabels = pReNative->cLabels;
2130 for (uint32_t i = 0; i < cLabels; i++)
2131 if ( paLabels[i].enmType == enmType
2132 && paLabels[i].uData == uData
2133 && ( paLabels[i].off == offWhere
2134 || offWhere == UINT32_MAX
2135 || paLabels[i].off == UINT32_MAX))
2136 return i;
2137 }
2138 return UINT32_MAX;
2139}
2140
2141
2142/**
2143 * Adds a fixup.
2144 *
2145 * @throws VBox status code (int) on failure.
2146 * @param pReNative The native recompile state.
2147 * @param offWhere The instruction offset of the fixup location.
2148 * @param idxLabel The target label ID for the fixup.
2149 * @param enmType The fixup type.
2150 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2151 */
2152DECL_HIDDEN_THROW(void)
2153iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2154 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2155{
2156 Assert(idxLabel <= UINT16_MAX);
2157 Assert((unsigned)enmType <= UINT8_MAX);
2158#ifdef RT_ARCH_ARM64
2159 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2160 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2162#endif
2163
2164 /*
2165 * Make sure we've room.
2166 */
2167 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2168 uint32_t const cFixups = pReNative->cFixups;
2169 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2170 { /* likely */ }
2171 else
2172 {
2173 uint32_t cNew = pReNative->cFixupsAlloc;
2174 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2175 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2176 cNew *= 2;
2177 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2178 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2179 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2180 pReNative->paFixups = paFixups;
2181 pReNative->cFixupsAlloc = cNew;
2182 }
2183
2184 /*
2185 * Add the fixup.
2186 */
2187 paFixups[cFixups].off = offWhere;
2188 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2189 paFixups[cFixups].enmType = enmType;
2190 paFixups[cFixups].offAddend = offAddend;
2191 pReNative->cFixups = cFixups + 1;
2192}
2193
2194
2195/**
2196 * Slow code path for iemNativeInstrBufEnsure.
2197 */
2198DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2199{
2200 /* Double the buffer size till we meet the request. */
2201 uint32_t cNew = pReNative->cInstrBufAlloc;
2202 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2203 do
2204 cNew *= 2;
2205 while (cNew < off + cInstrReq);
2206
2207 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2208#ifdef RT_ARCH_ARM64
2209 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2210#else
2211 uint32_t const cbMaxInstrBuf = _2M;
2212#endif
2213 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2214
2215 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2216 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2217
2218#ifdef VBOX_STRICT
2219 pReNative->offInstrBufChecked = off + cInstrReq;
2220#endif
2221 pReNative->cInstrBufAlloc = cNew;
2222 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2223}
2224
2225#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2226
2227/**
2228 * Grows the static debug info array used during recompilation.
2229 *
2230 * @returns Pointer to the new debug info block; throws VBox status code on
2231 * failure, so no need to check the return value.
2232 */
2233DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2234{
2235 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2236 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2237 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2238 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2239 pReNative->pDbgInfo = pDbgInfo;
2240 pReNative->cDbgInfoAlloc = cNew;
2241 return pDbgInfo;
2242}
2243
2244
2245/**
2246 * Adds a new debug info uninitialized entry, returning the pointer to it.
2247 */
2248DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2249{
2250 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2251 { /* likely */ }
2252 else
2253 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2254 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2255}
2256
2257
2258/**
2259 * Debug Info: Adds a native offset record, if necessary.
2260 */
2261DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2262{
2263 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2264
2265 /*
2266 * Do we need this one?
2267 */
2268 uint32_t const offPrev = pDbgInfo->offNativeLast;
2269 if (offPrev == off)
2270 return;
2271 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2272
2273 /*
2274 * Add it.
2275 */
2276 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2277 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2278 pEntry->NativeOffset.offNative = off;
2279 pDbgInfo->offNativeLast = off;
2280}
2281
2282
2283/**
2284 * Debug Info: Record info about a label.
2285 */
2286static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2287{
2288 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2289 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2290 pEntry->Label.uUnused = 0;
2291 pEntry->Label.enmLabel = (uint8_t)enmType;
2292 pEntry->Label.uData = uData;
2293}
2294
2295
2296/**
2297 * Debug Info: Record info about a threaded call.
2298 */
2299static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2300{
2301 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2302 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2303 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2304 pEntry->ThreadedCall.uUnused = 0;
2305 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2306}
2307
2308
2309/**
2310 * Debug Info: Record info about a new guest instruction.
2311 */
2312static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2313{
2314 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2315 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2316 pEntry->GuestInstruction.uUnused = 0;
2317 pEntry->GuestInstruction.fExec = fExec;
2318}
2319
2320
2321/**
2322 * Debug Info: Record info about guest register shadowing.
2323 */
2324DECL_HIDDEN_THROW(void)
2325iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2326 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2327{
2328 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2329 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2330 pEntry->GuestRegShadowing.uUnused = 0;
2331 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2332 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2333 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2334#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2335 Assert( idxHstReg != UINT8_MAX
2336 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2337#endif
2338}
2339
2340
2341# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2342/**
2343 * Debug Info: Record info about guest register shadowing.
2344 */
2345DECL_HIDDEN_THROW(void)
2346iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2347 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2348{
2349 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2350 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2351 pEntry->GuestSimdRegShadowing.uUnused = 0;
2352 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2353 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2354 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2355}
2356# endif
2357
2358
2359# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2360/**
2361 * Debug Info: Record info about delayed RIP updates.
2362 */
2363DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2364{
2365 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2366 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2367 pEntry->DelayedPcUpdate.offPc = offPc;
2368 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2369}
2370# endif
2371
2372# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2373
2374/**
2375 * Debug Info: Record info about a dirty guest register.
2376 */
2377DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2378 uint8_t idxGstReg, uint8_t idxHstReg)
2379{
2380 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2381 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2382 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2383 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2384 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2385}
2386
2387
2388/**
2389 * Debug Info: Record info about a dirty guest register writeback operation.
2390 */
2391DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2392{
2393 unsigned const cBitsGstRegMask = 25;
2394 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2395
2396 /* The first block of 25 bits: */
2397 if (fGstReg & fGstRegMask)
2398 {
2399 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2400 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2401 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2402 pEntry->GuestRegWriteback.cShift = 0;
2403 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2404 fGstReg &= ~(uint64_t)fGstRegMask;
2405 if (!fGstReg)
2406 return;
2407 }
2408
2409 /* The second block of 25 bits: */
2410 fGstReg >>= cBitsGstRegMask;
2411 if (fGstReg & fGstRegMask)
2412 {
2413 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2414 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2415 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2416 pEntry->GuestRegWriteback.cShift = 0;
2417 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2418 fGstReg &= ~(uint64_t)fGstRegMask;
2419 if (!fGstReg)
2420 return;
2421 }
2422
2423 /* The last block with 14 bits: */
2424 fGstReg >>= cBitsGstRegMask;
2425 Assert(fGstReg & fGstRegMask);
2426 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2427 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2428 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2429 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2430 pEntry->GuestRegWriteback.cShift = 2;
2431 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2432}
2433
2434# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2435
2436#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2437
2438
2439/*********************************************************************************************************************************
2440* Register Allocator *
2441*********************************************************************************************************************************/
2442
2443/**
2444 * Register parameter indexes (indexed by argument number).
2445 */
2446DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2447{
2448 IEMNATIVE_CALL_ARG0_GREG,
2449 IEMNATIVE_CALL_ARG1_GREG,
2450 IEMNATIVE_CALL_ARG2_GREG,
2451 IEMNATIVE_CALL_ARG3_GREG,
2452#if defined(IEMNATIVE_CALL_ARG4_GREG)
2453 IEMNATIVE_CALL_ARG4_GREG,
2454# if defined(IEMNATIVE_CALL_ARG5_GREG)
2455 IEMNATIVE_CALL_ARG5_GREG,
2456# if defined(IEMNATIVE_CALL_ARG6_GREG)
2457 IEMNATIVE_CALL_ARG6_GREG,
2458# if defined(IEMNATIVE_CALL_ARG7_GREG)
2459 IEMNATIVE_CALL_ARG7_GREG,
2460# endif
2461# endif
2462# endif
2463#endif
2464};
2465AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2466
2467/**
2468 * Call register masks indexed by argument count.
2469 */
2470DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2471{
2472 0,
2473 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2474 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2475 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2476 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2477 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2478#if defined(IEMNATIVE_CALL_ARG4_GREG)
2479 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2480 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2481# if defined(IEMNATIVE_CALL_ARG5_GREG)
2482 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2483 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2484# if defined(IEMNATIVE_CALL_ARG6_GREG)
2485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2486 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2487 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2488# if defined(IEMNATIVE_CALL_ARG7_GREG)
2489 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2490 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2491 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2492# endif
2493# endif
2494# endif
2495#endif
2496};
2497
2498#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2499/**
2500 * BP offset of the stack argument slots.
2501 *
2502 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2503 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2504 */
2505DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2506{
2507 IEMNATIVE_FP_OFF_STACK_ARG0,
2508# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2509 IEMNATIVE_FP_OFF_STACK_ARG1,
2510# endif
2511# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2512 IEMNATIVE_FP_OFF_STACK_ARG2,
2513# endif
2514# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2515 IEMNATIVE_FP_OFF_STACK_ARG3,
2516# endif
2517};
2518AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2519#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2520
2521/**
2522 * Info about shadowed guest register values.
2523 * @see IEMNATIVEGSTREG
2524 */
2525DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2526{
2527#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2528 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2529 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2530 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2531 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2532 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2533 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2534 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2535 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2536 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2537 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2538 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2539 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2540 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2541 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2542 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2543 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2544 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2545 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2546 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2547 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2548 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2549 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2550 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2551 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2552 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2553 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2554 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2555 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2556 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2557 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2558 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2559 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2560 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2561 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2562 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2563 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2564 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2565 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2566 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2567 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2568 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2569 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2570 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2571 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2572 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2573 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2574 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2575 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2576#undef CPUMCTX_OFF_AND_SIZE
2577};
2578AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2579
2580
2581/** Host CPU general purpose register names. */
2582DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2583{
2584#ifdef RT_ARCH_AMD64
2585 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2586#elif RT_ARCH_ARM64
2587 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2588 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2589#else
2590# error "port me"
2591#endif
2592};
2593
2594
2595#if 0 /* unused */
2596/**
2597 * Tries to locate a suitable register in the given register mask.
2598 *
2599 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2600 * failed.
2601 *
2602 * @returns Host register number on success, returns UINT8_MAX on failure.
2603 */
2604static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2605{
2606 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2607 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2608 if (fRegs)
2609 {
2610 /** @todo pick better here: */
2611 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2612
2613 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2614 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2615 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2616 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2617
2618 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2619 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2620 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2621 return idxReg;
2622 }
2623 return UINT8_MAX;
2624}
2625#endif /* unused */
2626
2627
2628#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2629/**
2630 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2631 *
2632 * @returns New code buffer offset on success, UINT32_MAX on failure.
2633 * @param pReNative .
2634 * @param off The current code buffer position.
2635 * @param enmGstReg The guest register to store to.
2636 * @param idxHstReg The host register to store from.
2637 */
2638DECL_FORCE_INLINE_THROW(uint32_t)
2639iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2640{
2641 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2642 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2643
2644 switch (g_aGstShadowInfo[enmGstReg].cb)
2645 {
2646 case sizeof(uint64_t):
2647 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2648 case sizeof(uint32_t):
2649 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2650 case sizeof(uint16_t):
2651 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2652#if 0 /* not present in the table. */
2653 case sizeof(uint8_t):
2654 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2655#endif
2656 default:
2657 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2658 }
2659}
2660
2661
2662/**
2663 * Emits code to flush a pending write of the given guest register if any.
2664 *
2665 * @returns New code buffer offset.
2666 * @param pReNative The native recompile state.
2667 * @param off Current code buffer position.
2668 * @param enmGstReg The guest register to flush.
2669 */
2670DECL_HIDDEN_THROW(uint32_t)
2671iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2672{
2673 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2674
2675 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2676 && enmGstReg <= kIemNativeGstReg_GprLast)
2677 || enmGstReg == kIemNativeGstReg_MxCsr);
2678 Assert( idxHstReg != UINT8_MAX
2679 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2680 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2681 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2682
2683 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2684
2685 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2686 return off;
2687}
2688
2689
2690/**
2691 * Flush the given set of guest registers if marked as dirty.
2692 *
2693 * @returns New code buffer offset.
2694 * @param pReNative The native recompile state.
2695 * @param off Current code buffer position.
2696 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2697 */
2698DECL_HIDDEN_THROW(uint32_t)
2699iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2700{
2701 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2702 if (bmGstRegShadowDirty)
2703 {
2704# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2705 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2706 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2707# endif
2708 do
2709 {
2710 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2711 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2712 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2713 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2714 } while (bmGstRegShadowDirty);
2715 }
2716
2717 return off;
2718}
2719
2720
2721/**
2722 * Flush all shadowed guest registers marked as dirty for the given host register.
2723 *
2724 * @returns New code buffer offset.
2725 * @param pReNative The native recompile state.
2726 * @param off Current code buffer position.
2727 * @param idxHstReg The host register.
2728 *
2729 * @note This doesn't do any unshadowing of guest registers from the host register.
2730 */
2731DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2732{
2733 /* We need to flush any pending guest register writes this host register shadows. */
2734 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2735 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2736 {
2737# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2738 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2739 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2740# endif
2741 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2742 * likely to only have a single bit set. It'll be in the 0..15 range,
2743 * but still it's 15 unnecessary loops for the last guest register. */
2744
2745 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2746 do
2747 {
2748 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2749 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2750 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2751 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2752 } while (bmGstRegShadowDirty);
2753 }
2754
2755 return off;
2756}
2757#endif
2758
2759
2760/**
2761 * Locate a register, possibly freeing one up.
2762 *
2763 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2764 * failed.
2765 *
2766 * @returns Host register number on success. Returns UINT8_MAX if no registers
2767 * found, the caller is supposed to deal with this and raise a
2768 * allocation type specific status code (if desired).
2769 *
2770 * @throws VBox status code if we're run into trouble spilling a variable of
2771 * recording debug info. Does NOT throw anything if we're out of
2772 * registers, though.
2773 */
2774static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2775 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2776{
2777 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2778 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2779 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2780
2781 /*
2782 * Try a freed register that's shadowing a guest register.
2783 */
2784 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2785 if (fRegs)
2786 {
2787 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2788
2789#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2790 /*
2791 * When we have livness information, we use it to kick out all shadowed
2792 * guest register that will not be needed any more in this TB. If we're
2793 * lucky, this may prevent us from ending up here again.
2794 *
2795 * Note! We must consider the previous entry here so we don't free
2796 * anything that the current threaded function requires (current
2797 * entry is produced by the next threaded function).
2798 */
2799 uint32_t const idxCurCall = pReNative->idxCurCall;
2800 if (idxCurCall > 0)
2801 {
2802 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2803
2804# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2805 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2806 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2807 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2808#else
2809 /* Construct a mask of the registers not in the read or write state.
2810 Note! We could skips writes, if they aren't from us, as this is just
2811 a hack to prevent trashing registers that have just been written
2812 or will be written when we retire the current instruction. */
2813 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2814 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2815 & IEMLIVENESSBIT_MASK;
2816#endif
2817 /* Merge EFLAGS. */
2818 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2819 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2820 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2821 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2822 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2823
2824 /* If it matches any shadowed registers. */
2825 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2826 {
2827#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2828 /* Writeback any dirty shadow registers we are about to unshadow. */
2829 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2830#endif
2831
2832 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2833 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2834 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2835
2836 /* See if we've got any unshadowed registers we can return now. */
2837 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2838 if (fUnshadowedRegs)
2839 {
2840 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2841 return (fPreferVolatile
2842 ? ASMBitFirstSetU32(fUnshadowedRegs)
2843 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2844 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2845 - 1;
2846 }
2847 }
2848 }
2849#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2850
2851 unsigned const idxReg = (fPreferVolatile
2852 ? ASMBitFirstSetU32(fRegs)
2853 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2854 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2855 - 1;
2856
2857 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2858 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2859 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2860 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2861
2862#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2863 /* We need to flush any pending guest register writes this host register shadows. */
2864 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2865#endif
2866
2867 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2868 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2869 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2870 return idxReg;
2871 }
2872
2873 /*
2874 * Try free up a variable that's in a register.
2875 *
2876 * We do two rounds here, first evacuating variables we don't need to be
2877 * saved on the stack, then in the second round move things to the stack.
2878 */
2879 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2880 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2881 {
2882 uint32_t fVars = pReNative->Core.bmVars;
2883 while (fVars)
2884 {
2885 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2886 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2887#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2888 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2889 continue;
2890#endif
2891
2892 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2893 && (RT_BIT_32(idxReg) & fRegMask)
2894 && ( iLoop == 0
2895 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2896 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2897 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2898 {
2899 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2900 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2901 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2902 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2903 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2904 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2905#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2906 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2907#endif
2908
2909 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2910 {
2911 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2912 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2913 }
2914
2915 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2916 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2917
2918 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2919 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2920 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2921 return idxReg;
2922 }
2923 fVars &= ~RT_BIT_32(idxVar);
2924 }
2925 }
2926
2927 return UINT8_MAX;
2928}
2929
2930
2931/**
2932 * Reassigns a variable to a different register specified by the caller.
2933 *
2934 * @returns The new code buffer position.
2935 * @param pReNative The native recompile state.
2936 * @param off The current code buffer position.
2937 * @param idxVar The variable index.
2938 * @param idxRegOld The old host register number.
2939 * @param idxRegNew The new host register number.
2940 * @param pszCaller The caller for logging.
2941 */
2942static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2943 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2944{
2945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2946 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2947#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2948 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2949#endif
2950 RT_NOREF(pszCaller);
2951
2952#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2953 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2954#endif
2955 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2956
2957 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2958#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2959 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2960#endif
2961 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2962 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2964
2965 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2966 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2967 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2968 if (fGstRegShadows)
2969 {
2970 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2971 | RT_BIT_32(idxRegNew);
2972 while (fGstRegShadows)
2973 {
2974 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2975 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2976
2977 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2978 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2979 }
2980 }
2981
2982 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2983 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2984 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2985 return off;
2986}
2987
2988
2989/**
2990 * Moves a variable to a different register or spills it onto the stack.
2991 *
2992 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2993 * kinds can easily be recreated if needed later.
2994 *
2995 * @returns The new code buffer position.
2996 * @param pReNative The native recompile state.
2997 * @param off The current code buffer position.
2998 * @param idxVar The variable index.
2999 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3000 * call-volatile registers.
3001 */
3002DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3003 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3004{
3005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3006 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3007 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3008 Assert(!pVar->fRegAcquired);
3009
3010 uint8_t const idxRegOld = pVar->idxReg;
3011 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3012 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3013 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3014 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3015 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3016 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3017 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3018 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3019#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3020 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3021#endif
3022
3023
3024 /** @todo Add statistics on this.*/
3025 /** @todo Implement basic variable liveness analysis (python) so variables
3026 * can be freed immediately once no longer used. This has the potential to
3027 * be trashing registers and stack for dead variables.
3028 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3029
3030 /*
3031 * First try move it to a different register, as that's cheaper.
3032 */
3033 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3034 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3035 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3036 if (fRegs)
3037 {
3038 /* Avoid using shadow registers, if possible. */
3039 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3040 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3041 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3042 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3043 }
3044
3045 /*
3046 * Otherwise we must spill the register onto the stack.
3047 */
3048 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3049 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3050 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3051 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3052
3053 pVar->idxReg = UINT8_MAX;
3054 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3055 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3056 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3057 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3058 return off;
3059}
3060
3061
3062/**
3063 * Allocates a temporary host general purpose register.
3064 *
3065 * This may emit code to save register content onto the stack in order to free
3066 * up a register.
3067 *
3068 * @returns The host register number; throws VBox status code on failure,
3069 * so no need to check the return value.
3070 * @param pReNative The native recompile state.
3071 * @param poff Pointer to the variable with the code buffer position.
3072 * This will be update if we need to move a variable from
3073 * register to stack in order to satisfy the request.
3074 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3075 * registers (@c true, default) or the other way around
3076 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3077 */
3078DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3079{
3080 /*
3081 * Try find a completely unused register, preferably a call-volatile one.
3082 */
3083 uint8_t idxReg;
3084 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3085 & ~pReNative->Core.bmHstRegsWithGstShadow
3086 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3087 if (fRegs)
3088 {
3089 if (fPreferVolatile)
3090 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3091 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3092 else
3093 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3094 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3095 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3096 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3097 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3098 }
3099 else
3100 {
3101 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3102 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3103 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3104 }
3105 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3106}
3107
3108
3109/**
3110 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3111 * registers.
3112 *
3113 * @returns The host register number; throws VBox status code on failure,
3114 * so no need to check the return value.
3115 * @param pReNative The native recompile state.
3116 * @param poff Pointer to the variable with the code buffer position.
3117 * This will be update if we need to move a variable from
3118 * register to stack in order to satisfy the request.
3119 * @param fRegMask Mask of acceptable registers.
3120 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3121 * registers (@c true, default) or the other way around
3122 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3123 */
3124DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3125 bool fPreferVolatile /*= true*/)
3126{
3127 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3128 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3129
3130 /*
3131 * Try find a completely unused register, preferably a call-volatile one.
3132 */
3133 uint8_t idxReg;
3134 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3135 & ~pReNative->Core.bmHstRegsWithGstShadow
3136 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3137 & fRegMask;
3138 if (fRegs)
3139 {
3140 if (fPreferVolatile)
3141 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3142 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3143 else
3144 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3145 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3146 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3147 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3148 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3149 }
3150 else
3151 {
3152 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3153 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3154 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3155 }
3156 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3157}
3158
3159
3160/**
3161 * Allocates a temporary register for loading an immediate value into.
3162 *
3163 * This will emit code to load the immediate, unless there happens to be an
3164 * unused register with the value already loaded.
3165 *
3166 * The caller will not modify the returned register, it must be considered
3167 * read-only. Free using iemNativeRegFreeTmpImm.
3168 *
3169 * @returns The host register number; throws VBox status code on failure, so no
3170 * need to check the return value.
3171 * @param pReNative The native recompile state.
3172 * @param poff Pointer to the variable with the code buffer position.
3173 * @param uImm The immediate value that the register must hold upon
3174 * return.
3175 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3176 * registers (@c true, default) or the other way around
3177 * (@c false).
3178 *
3179 * @note Reusing immediate values has not been implemented yet.
3180 */
3181DECL_HIDDEN_THROW(uint8_t)
3182iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3183{
3184 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3185 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3186 return idxReg;
3187}
3188
3189
3190/**
3191 * Allocates a temporary host general purpose register for keeping a guest
3192 * register value.
3193 *
3194 * Since we may already have a register holding the guest register value,
3195 * code will be emitted to do the loading if that's not the case. Code may also
3196 * be emitted if we have to free up a register to satify the request.
3197 *
3198 * @returns The host register number; throws VBox status code on failure, so no
3199 * need to check the return value.
3200 * @param pReNative The native recompile state.
3201 * @param poff Pointer to the variable with the code buffer
3202 * position. This will be update if we need to move a
3203 * variable from register to stack in order to satisfy
3204 * the request.
3205 * @param enmGstReg The guest register that will is to be updated.
3206 * @param enmIntendedUse How the caller will be using the host register.
3207 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3208 * register is okay (default). The ASSUMPTION here is
3209 * that the caller has already flushed all volatile
3210 * registers, so this is only applied if we allocate a
3211 * new register.
3212 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3213 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3214 */
3215DECL_HIDDEN_THROW(uint8_t)
3216iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3217 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3218 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3219{
3220 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3221#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3222 AssertMsg( fSkipLivenessAssert
3223 || pReNative->idxCurCall == 0
3224 || enmGstReg == kIemNativeGstReg_Pc
3225 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3226 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3227 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3228 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3229 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3230 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3231#endif
3232 RT_NOREF(fSkipLivenessAssert);
3233#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3234 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3235#endif
3236 uint32_t const fRegMask = !fNoVolatileRegs
3237 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3238 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3239
3240 /*
3241 * First check if the guest register value is already in a host register.
3242 */
3243 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3244 {
3245 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3246 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3247 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3248 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3249
3250 /* It's not supposed to be allocated... */
3251 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3252 {
3253 /*
3254 * If the register will trash the guest shadow copy, try find a
3255 * completely unused register we can use instead. If that fails,
3256 * we need to disassociate the host reg from the guest reg.
3257 */
3258 /** @todo would be nice to know if preserving the register is in any way helpful. */
3259 /* If the purpose is calculations, try duplicate the register value as
3260 we'll be clobbering the shadow. */
3261 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3262 && ( ~pReNative->Core.bmHstRegs
3263 & ~pReNative->Core.bmHstRegsWithGstShadow
3264 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3265 {
3266 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3267
3268 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3269
3270 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3271 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3272 g_apszIemNativeHstRegNames[idxRegNew]));
3273 idxReg = idxRegNew;
3274 }
3275 /* If the current register matches the restrictions, go ahead and allocate
3276 it for the caller. */
3277 else if (fRegMask & RT_BIT_32(idxReg))
3278 {
3279 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3280 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3281 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3282 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3283 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3284 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3285 else
3286 {
3287 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3288 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3289 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3290 }
3291 }
3292 /* Otherwise, allocate a register that satisfies the caller and transfer
3293 the shadowing if compatible with the intended use. (This basically
3294 means the call wants a non-volatile register (RSP push/pop scenario).) */
3295 else
3296 {
3297 Assert(fNoVolatileRegs);
3298 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3299 !fNoVolatileRegs
3300 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3301 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3302 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3303 {
3304 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3305 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3306 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3307 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3308 }
3309 else
3310 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3311 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3312 g_apszIemNativeHstRegNames[idxRegNew]));
3313 idxReg = idxRegNew;
3314 }
3315 }
3316 else
3317 {
3318 /*
3319 * Oops. Shadowed guest register already allocated!
3320 *
3321 * Allocate a new register, copy the value and, if updating, the
3322 * guest shadow copy assignment to the new register.
3323 */
3324 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3325 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3326 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3327 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3328
3329 /** @todo share register for readonly access. */
3330 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3331 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3332
3333 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3334 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3335
3336 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3337 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3338 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3339 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3340 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3341 else
3342 {
3343 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3344 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3345 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3346 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3347 }
3348 idxReg = idxRegNew;
3349 }
3350 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3351
3352#ifdef VBOX_STRICT
3353 /* Strict builds: Check that the value is correct. */
3354 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3355#endif
3356
3357#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3358 /** @todo r=aeichner Implement for registers other than GPR as well. */
3359 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3360 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3361 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3362 && enmGstReg <= kIemNativeGstReg_GprLast)
3363 || enmGstReg == kIemNativeGstReg_MxCsr))
3364 {
3365# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3366 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3367 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3368# endif
3369 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3370 }
3371#endif
3372
3373 return idxReg;
3374 }
3375
3376 /*
3377 * Allocate a new register, load it with the guest value and designate it as a copy of the
3378 */
3379 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3380
3381 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3382 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3383
3384 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3385 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3386 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3387 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3388
3389#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3390 /** @todo r=aeichner Implement for registers other than GPR as well. */
3391 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3392 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3393 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3394 && enmGstReg <= kIemNativeGstReg_GprLast)
3395 || enmGstReg == kIemNativeGstReg_MxCsr))
3396 {
3397# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3398 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3399 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3400# endif
3401 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3402 }
3403#endif
3404
3405 return idxRegNew;
3406}
3407
3408
3409/**
3410 * Allocates a temporary host general purpose register that already holds the
3411 * given guest register value.
3412 *
3413 * The use case for this function is places where the shadowing state cannot be
3414 * modified due to branching and such. This will fail if the we don't have a
3415 * current shadow copy handy or if it's incompatible. The only code that will
3416 * be emitted here is value checking code in strict builds.
3417 *
3418 * The intended use can only be readonly!
3419 *
3420 * @returns The host register number, UINT8_MAX if not present.
3421 * @param pReNative The native recompile state.
3422 * @param poff Pointer to the instruction buffer offset.
3423 * Will be updated in strict builds if a register is
3424 * found.
3425 * @param enmGstReg The guest register that will is to be updated.
3426 * @note In strict builds, this may throw instruction buffer growth failures.
3427 * Non-strict builds will not throw anything.
3428 * @sa iemNativeRegAllocTmpForGuestReg
3429 */
3430DECL_HIDDEN_THROW(uint8_t)
3431iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3432{
3433 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3434#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3435 AssertMsg( pReNative->idxCurCall == 0
3436 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3437 || enmGstReg == kIemNativeGstReg_Pc,
3438 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3439#endif
3440
3441 /*
3442 * First check if the guest register value is already in a host register.
3443 */
3444 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3445 {
3446 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3447 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3448 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3449 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3450
3451 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3452 {
3453 /*
3454 * We only do readonly use here, so easy compared to the other
3455 * variant of this code.
3456 */
3457 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3458 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3459 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3460 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3461 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3462
3463#ifdef VBOX_STRICT
3464 /* Strict builds: Check that the value is correct. */
3465 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3466#else
3467 RT_NOREF(poff);
3468#endif
3469 return idxReg;
3470 }
3471 }
3472
3473 return UINT8_MAX;
3474}
3475
3476
3477/**
3478 * Allocates argument registers for a function call.
3479 *
3480 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3481 * need to check the return value.
3482 * @param pReNative The native recompile state.
3483 * @param off The current code buffer offset.
3484 * @param cArgs The number of arguments the function call takes.
3485 */
3486DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3487{
3488 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3489 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3490 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3491 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3492
3493 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3494 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3495 else if (cArgs == 0)
3496 return true;
3497
3498 /*
3499 * Do we get luck and all register are free and not shadowing anything?
3500 */
3501 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3502 for (uint32_t i = 0; i < cArgs; i++)
3503 {
3504 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3505 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3506 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3507 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3508 }
3509 /*
3510 * Okay, not lucky so we have to free up the registers.
3511 */
3512 else
3513 for (uint32_t i = 0; i < cArgs; i++)
3514 {
3515 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3516 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3517 {
3518 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3519 {
3520 case kIemNativeWhat_Var:
3521 {
3522 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3524 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3525 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3526 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3527#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3528 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3529#endif
3530
3531 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3532 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3533 else
3534 {
3535 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3536 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3537 }
3538 break;
3539 }
3540
3541 case kIemNativeWhat_Tmp:
3542 case kIemNativeWhat_Arg:
3543 case kIemNativeWhat_rc:
3544 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3545 default:
3546 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3547 }
3548
3549 }
3550 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3551 {
3552 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3553 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3554 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3555#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3556 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3557#endif
3558 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3559 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3560 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3561 }
3562 else
3563 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3564 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3565 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3566 }
3567 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3568 return true;
3569}
3570
3571
3572DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3573
3574
3575#if 0
3576/**
3577 * Frees a register assignment of any type.
3578 *
3579 * @param pReNative The native recompile state.
3580 * @param idxHstReg The register to free.
3581 *
3582 * @note Does not update variables.
3583 */
3584DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3585{
3586 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3587 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3588 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3589 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3590 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3591 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3592 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3593 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3594 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3595 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3596 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3597 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3598 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3599 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3600
3601 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3602 /* no flushing, right:
3603 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3604 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3605 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3606 */
3607}
3608#endif
3609
3610
3611/**
3612 * Frees a temporary register.
3613 *
3614 * Any shadow copies of guest registers assigned to the host register will not
3615 * be flushed by this operation.
3616 */
3617DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3618{
3619 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3620 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3621 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3622 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3623 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3624}
3625
3626
3627/**
3628 * Frees a temporary immediate register.
3629 *
3630 * It is assumed that the call has not modified the register, so it still hold
3631 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3632 */
3633DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3634{
3635 iemNativeRegFreeTmp(pReNative, idxHstReg);
3636}
3637
3638
3639/**
3640 * Frees a register assigned to a variable.
3641 *
3642 * The register will be disassociated from the variable.
3643 */
3644DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3645{
3646 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3647 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3648 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3650 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3651#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3652 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3653#endif
3654
3655 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3656 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3657 if (!fFlushShadows)
3658 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3659 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3660 else
3661 {
3662 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3663 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3664#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3665 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3666#endif
3667 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3668 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3669 uint64_t fGstRegShadows = fGstRegShadowsOld;
3670 while (fGstRegShadows)
3671 {
3672 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3673 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3674
3675 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3676 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3677 }
3678 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3679 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3680 }
3681}
3682
3683
3684#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3685# ifdef LOG_ENABLED
3686/** Host CPU SIMD register names. */
3687DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3688{
3689# ifdef RT_ARCH_AMD64
3690 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3691# elif RT_ARCH_ARM64
3692 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3693 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3694# else
3695# error "port me"
3696# endif
3697};
3698# endif
3699
3700
3701/**
3702 * Frees a SIMD register assigned to a variable.
3703 *
3704 * The register will be disassociated from the variable.
3705 */
3706DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3707{
3708 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3709 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3710 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3712 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3713 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3714
3715 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3716 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3717 if (!fFlushShadows)
3718 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3719 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3720 else
3721 {
3722 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3723 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3724 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3725 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3726 uint64_t fGstRegShadows = fGstRegShadowsOld;
3727 while (fGstRegShadows)
3728 {
3729 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3730 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3731
3732 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3733 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3734 }
3735 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3736 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3737 }
3738}
3739
3740
3741/**
3742 * Reassigns a variable to a different SIMD register specified by the caller.
3743 *
3744 * @returns The new code buffer position.
3745 * @param pReNative The native recompile state.
3746 * @param off The current code buffer position.
3747 * @param idxVar The variable index.
3748 * @param idxRegOld The old host register number.
3749 * @param idxRegNew The new host register number.
3750 * @param pszCaller The caller for logging.
3751 */
3752static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3753 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3754{
3755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3756 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3757 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3758 RT_NOREF(pszCaller);
3759
3760 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3761 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3762 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3763
3764 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3765 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3766 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3767
3768 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3769 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3770 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3771
3772 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3773 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3774 else
3775 {
3776 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3777 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3778 }
3779
3780 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3781 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3782 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3783 if (fGstRegShadows)
3784 {
3785 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3786 | RT_BIT_32(idxRegNew);
3787 while (fGstRegShadows)
3788 {
3789 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3790 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3791
3792 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3793 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3794 }
3795 }
3796
3797 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3798 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3799 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3800 return off;
3801}
3802
3803
3804/**
3805 * Moves a variable to a different register or spills it onto the stack.
3806 *
3807 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3808 * kinds can easily be recreated if needed later.
3809 *
3810 * @returns The new code buffer position.
3811 * @param pReNative The native recompile state.
3812 * @param off The current code buffer position.
3813 * @param idxVar The variable index.
3814 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3815 * call-volatile registers.
3816 */
3817DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3818 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3819{
3820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3821 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3822 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3823 Assert(!pVar->fRegAcquired);
3824 Assert(!pVar->fSimdReg);
3825
3826 uint8_t const idxRegOld = pVar->idxReg;
3827 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3828 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3829 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3830 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3831 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3832 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3833 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3834 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3835 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3836 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3837
3838 /** @todo Add statistics on this.*/
3839 /** @todo Implement basic variable liveness analysis (python) so variables
3840 * can be freed immediately once no longer used. This has the potential to
3841 * be trashing registers and stack for dead variables.
3842 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3843
3844 /*
3845 * First try move it to a different register, as that's cheaper.
3846 */
3847 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3848 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3849 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3850 if (fRegs)
3851 {
3852 /* Avoid using shadow registers, if possible. */
3853 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3854 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3855 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3856 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3857 }
3858
3859 /*
3860 * Otherwise we must spill the register onto the stack.
3861 */
3862 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3863 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3864 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3865
3866 if (pVar->cbVar == sizeof(RTUINT128U))
3867 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3868 else
3869 {
3870 Assert(pVar->cbVar == sizeof(RTUINT256U));
3871 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3872 }
3873
3874 pVar->idxReg = UINT8_MAX;
3875 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3876 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3877 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3878 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3879 return off;
3880}
3881
3882
3883/**
3884 * Called right before emitting a call instruction to move anything important
3885 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3886 * optionally freeing argument variables.
3887 *
3888 * @returns New code buffer offset, UINT32_MAX on failure.
3889 * @param pReNative The native recompile state.
3890 * @param off The code buffer offset.
3891 * @param cArgs The number of arguments the function call takes.
3892 * It is presumed that the host register part of these have
3893 * been allocated as such already and won't need moving,
3894 * just freeing.
3895 * @param fKeepVars Mask of variables that should keep their register
3896 * assignments. Caller must take care to handle these.
3897 */
3898DECL_HIDDEN_THROW(uint32_t)
3899iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3900{
3901 Assert(!cArgs); RT_NOREF(cArgs);
3902
3903 /* fKeepVars will reduce this mask. */
3904 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3905
3906 /*
3907 * Move anything important out of volatile registers.
3908 */
3909 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3910#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3911 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3912#endif
3913 ;
3914
3915 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3916 if (!fSimdRegsToMove)
3917 { /* likely */ }
3918 else
3919 {
3920 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3921 while (fSimdRegsToMove != 0)
3922 {
3923 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3924 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3925
3926 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3927 {
3928 case kIemNativeWhat_Var:
3929 {
3930 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3932 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3933 Assert(pVar->idxReg == idxSimdReg);
3934 Assert(pVar->fSimdReg);
3935 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3936 {
3937 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3938 idxVar, pVar->enmKind, pVar->idxReg));
3939 if (pVar->enmKind != kIemNativeVarKind_Stack)
3940 pVar->idxReg = UINT8_MAX;
3941 else
3942 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3943 }
3944 else
3945 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3946 continue;
3947 }
3948
3949 case kIemNativeWhat_Arg:
3950 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3951 continue;
3952
3953 case kIemNativeWhat_rc:
3954 case kIemNativeWhat_Tmp:
3955 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3956 continue;
3957
3958 case kIemNativeWhat_FixedReserved:
3959#ifdef RT_ARCH_ARM64
3960 continue; /* On ARM the upper half of the virtual 256-bit register. */
3961#endif
3962
3963 case kIemNativeWhat_FixedTmp:
3964 case kIemNativeWhat_pVCpuFixed:
3965 case kIemNativeWhat_pCtxFixed:
3966 case kIemNativeWhat_PcShadow:
3967 case kIemNativeWhat_Invalid:
3968 case kIemNativeWhat_End:
3969 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3970 }
3971 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3972 }
3973 }
3974
3975 /*
3976 * Do the actual freeing.
3977 */
3978 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3979 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3980 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3981 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3982
3983 /* If there are guest register shadows in any call-volatile register, we
3984 have to clear the corrsponding guest register masks for each register. */
3985 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3986 if (fHstSimdRegsWithGstShadow)
3987 {
3988 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3989 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
3990 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
3991 do
3992 {
3993 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
3994 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
3995
3996 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
3997
3998#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3999 /*
4000 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4001 * to call volatile registers).
4002 */
4003 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4004 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4005 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4006#endif
4007 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4008 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4009
4010 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4011 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4012 } while (fHstSimdRegsWithGstShadow != 0);
4013 }
4014
4015 return off;
4016}
4017#endif
4018
4019
4020/**
4021 * Called right before emitting a call instruction to move anything important
4022 * out of call-volatile registers, free and flush the call-volatile registers,
4023 * optionally freeing argument variables.
4024 *
4025 * @returns New code buffer offset, UINT32_MAX on failure.
4026 * @param pReNative The native recompile state.
4027 * @param off The code buffer offset.
4028 * @param cArgs The number of arguments the function call takes.
4029 * It is presumed that the host register part of these have
4030 * been allocated as such already and won't need moving,
4031 * just freeing.
4032 * @param fKeepVars Mask of variables that should keep their register
4033 * assignments. Caller must take care to handle these.
4034 */
4035DECL_HIDDEN_THROW(uint32_t)
4036iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4037{
4038 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4039
4040 /* fKeepVars will reduce this mask. */
4041 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4042
4043 /*
4044 * Move anything important out of volatile registers.
4045 */
4046 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4047 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4048 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4049#ifdef IEMNATIVE_REG_FIXED_TMP0
4050 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4051#endif
4052#ifdef IEMNATIVE_REG_FIXED_TMP1
4053 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4054#endif
4055#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4056 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4057#endif
4058 & ~g_afIemNativeCallRegs[cArgs];
4059
4060 fRegsToMove &= pReNative->Core.bmHstRegs;
4061 if (!fRegsToMove)
4062 { /* likely */ }
4063 else
4064 {
4065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4066 while (fRegsToMove != 0)
4067 {
4068 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4069 fRegsToMove &= ~RT_BIT_32(idxReg);
4070
4071 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4072 {
4073 case kIemNativeWhat_Var:
4074 {
4075 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4077 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4078 Assert(pVar->idxReg == idxReg);
4079#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4080 Assert(!pVar->fSimdReg);
4081#endif
4082 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4083 {
4084 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4085 idxVar, pVar->enmKind, pVar->idxReg));
4086 if (pVar->enmKind != kIemNativeVarKind_Stack)
4087 pVar->idxReg = UINT8_MAX;
4088 else
4089 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4090 }
4091 else
4092 fRegsToFree &= ~RT_BIT_32(idxReg);
4093 continue;
4094 }
4095
4096 case kIemNativeWhat_Arg:
4097 AssertMsgFailed(("What?!?: %u\n", idxReg));
4098 continue;
4099
4100 case kIemNativeWhat_rc:
4101 case kIemNativeWhat_Tmp:
4102 AssertMsgFailed(("Missing free: %u\n", idxReg));
4103 continue;
4104
4105 case kIemNativeWhat_FixedTmp:
4106 case kIemNativeWhat_pVCpuFixed:
4107 case kIemNativeWhat_pCtxFixed:
4108 case kIemNativeWhat_PcShadow:
4109 case kIemNativeWhat_FixedReserved:
4110 case kIemNativeWhat_Invalid:
4111 case kIemNativeWhat_End:
4112 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4113 }
4114 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4115 }
4116 }
4117
4118 /*
4119 * Do the actual freeing.
4120 */
4121 if (pReNative->Core.bmHstRegs & fRegsToFree)
4122 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4123 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4124 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4125
4126 /* If there are guest register shadows in any call-volatile register, we
4127 have to clear the corrsponding guest register masks for each register. */
4128 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4129 if (fHstRegsWithGstShadow)
4130 {
4131 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4132 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4133 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4134 do
4135 {
4136 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4137 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4138
4139 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4140
4141#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4142 /*
4143 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4144 * to call volatile registers).
4145 */
4146 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4147 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4148 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4149#endif
4150
4151 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4152 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4153 } while (fHstRegsWithGstShadow != 0);
4154 }
4155
4156#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4157 /* Now for the SIMD registers, no argument support for now. */
4158 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4159#endif
4160
4161 return off;
4162}
4163
4164
4165/**
4166 * Flushes a set of guest register shadow copies.
4167 *
4168 * This is usually done after calling a threaded function or a C-implementation
4169 * of an instruction.
4170 *
4171 * @param pReNative The native recompile state.
4172 * @param fGstRegs Set of guest registers to flush.
4173 */
4174DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4175{
4176 /*
4177 * Reduce the mask by what's currently shadowed
4178 */
4179 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4180 fGstRegs &= bmGstRegShadowsOld;
4181 if (fGstRegs)
4182 {
4183 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4184 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4185 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4186 if (bmGstRegShadowsNew)
4187 {
4188 /*
4189 * Partial.
4190 */
4191 do
4192 {
4193 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4194 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4195 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4196 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4197 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4199 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4200#endif
4201
4202 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4203 fGstRegs &= ~fInThisHstReg;
4204 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4205 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4206 if (!fGstRegShadowsNew)
4207 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4208 } while (fGstRegs != 0);
4209 }
4210 else
4211 {
4212 /*
4213 * Clear all.
4214 */
4215 do
4216 {
4217 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4218 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4219 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4220 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4221 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4222#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4223 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4224#endif
4225
4226 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4227 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4228 } while (fGstRegs != 0);
4229 pReNative->Core.bmHstRegsWithGstShadow = 0;
4230 }
4231 }
4232}
4233
4234
4235/**
4236 * Flushes guest register shadow copies held by a set of host registers.
4237 *
4238 * This is used with the TLB lookup code for ensuring that we don't carry on
4239 * with any guest shadows in volatile registers, as these will get corrupted by
4240 * a TLB miss.
4241 *
4242 * @param pReNative The native recompile state.
4243 * @param fHstRegs Set of host registers to flush guest shadows for.
4244 */
4245DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4246{
4247 /*
4248 * Reduce the mask by what's currently shadowed.
4249 */
4250 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4251 fHstRegs &= bmHstRegsWithGstShadowOld;
4252 if (fHstRegs)
4253 {
4254 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4255 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4256 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4257 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4258 if (bmHstRegsWithGstShadowNew)
4259 {
4260 /*
4261 * Partial (likely).
4262 */
4263 uint64_t fGstShadows = 0;
4264 do
4265 {
4266 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4267 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4268 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4269 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4270#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4271 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4272#endif
4273
4274 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4275 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4276 fHstRegs &= ~RT_BIT_32(idxHstReg);
4277 } while (fHstRegs != 0);
4278 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4279 }
4280 else
4281 {
4282 /*
4283 * Clear all.
4284 */
4285 do
4286 {
4287 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4288 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4289 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4290 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4291#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4292 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4293#endif
4294
4295 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4296 fHstRegs &= ~RT_BIT_32(idxHstReg);
4297 } while (fHstRegs != 0);
4298 pReNative->Core.bmGstRegShadows = 0;
4299 }
4300 }
4301}
4302
4303
4304/**
4305 * Restores guest shadow copies in volatile registers.
4306 *
4307 * This is used after calling a helper function (think TLB miss) to restore the
4308 * register state of volatile registers.
4309 *
4310 * @param pReNative The native recompile state.
4311 * @param off The code buffer offset.
4312 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4313 * be active (allocated) w/o asserting. Hack.
4314 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4315 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4316 */
4317DECL_HIDDEN_THROW(uint32_t)
4318iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4319{
4320 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4321 if (fHstRegs)
4322 {
4323 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4324 do
4325 {
4326 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4327
4328 /* It's not fatal if a register is active holding a variable that
4329 shadowing a guest register, ASSUMING all pending guest register
4330 writes were flushed prior to the helper call. However, we'll be
4331 emitting duplicate restores, so it wasts code space. */
4332 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4333 RT_NOREF(fHstRegsActiveShadows);
4334
4335 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4337 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4338#endif
4339 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4340 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4342
4343 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4344 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4345
4346 fHstRegs &= ~RT_BIT_32(idxHstReg);
4347 } while (fHstRegs != 0);
4348 }
4349 return off;
4350}
4351
4352
4353
4354
4355/*********************************************************************************************************************************
4356* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4357*********************************************************************************************************************************/
4358#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4359
4360/**
4361 * Info about shadowed guest SIMD register values.
4362 * @see IEMNATIVEGSTSIMDREG
4363 */
4364static struct
4365{
4366 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4367 uint32_t offXmm;
4368 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4369 uint32_t offYmm;
4370 /** Name (for logging). */
4371 const char *pszName;
4372} const g_aGstSimdShadowInfo[] =
4373{
4374#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4375 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4376 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4377 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4378 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4379 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4380 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4381 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4382 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4383 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4384 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4385 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4386 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4387 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4388 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4389 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4390 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4391 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4392#undef CPUMCTX_OFF_AND_SIZE
4393};
4394AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4395
4396
4397/**
4398 * Frees a temporary SIMD register.
4399 *
4400 * Any shadow copies of guest registers assigned to the host register will not
4401 * be flushed by this operation.
4402 */
4403DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4404{
4405 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4406 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4407 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4408 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4409 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4410}
4411
4412
4413/**
4414 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4415 *
4416 * @returns New code bufferoffset.
4417 * @param pReNative The native recompile state.
4418 * @param off Current code buffer position.
4419 * @param enmGstSimdReg The guest SIMD register to flush.
4420 */
4421DECL_HIDDEN_THROW(uint32_t)
4422iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4423{
4424 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4425
4426 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4427 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4428 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4429 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4430
4431 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4432 {
4433 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4434 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4435 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4436 }
4437
4438 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4439 {
4440 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4441 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4442 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4443 }
4444
4445 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4446 return off;
4447}
4448
4449
4450/**
4451 * Flush the given set of guest SIMD registers if marked as dirty.
4452 *
4453 * @returns New code buffer offset.
4454 * @param pReNative The native recompile state.
4455 * @param off Current code buffer position.
4456 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4457 */
4458DECL_HIDDEN_THROW(uint32_t)
4459iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4460{
4461 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4462 & fFlushGstSimdReg;
4463 if (bmGstSimdRegShadowDirty)
4464 {
4465# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4466 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4467 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4468# endif
4469
4470 do
4471 {
4472 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4473 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4474 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4475 } while (bmGstSimdRegShadowDirty);
4476 }
4477
4478 return off;
4479}
4480
4481
4482#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4483/**
4484 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4485 *
4486 * @returns New code buffer offset.
4487 * @param pReNative The native recompile state.
4488 * @param off Current code buffer position.
4489 * @param idxHstSimdReg The host SIMD register.
4490 *
4491 * @note This doesn't do any unshadowing of guest registers from the host register.
4492 */
4493DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4494{
4495 /* We need to flush any pending guest register writes this host register shadows. */
4496 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4497 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4498 if (bmGstSimdRegShadowDirty)
4499 {
4500# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4501 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4502 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4503# endif
4504
4505 do
4506 {
4507 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4508 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4509 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4510 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4511 } while (bmGstSimdRegShadowDirty);
4512 }
4513
4514 return off;
4515}
4516#endif
4517
4518
4519/**
4520 * Locate a register, possibly freeing one up.
4521 *
4522 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4523 * failed.
4524 *
4525 * @returns Host register number on success. Returns UINT8_MAX if no registers
4526 * found, the caller is supposed to deal with this and raise a
4527 * allocation type specific status code (if desired).
4528 *
4529 * @throws VBox status code if we're run into trouble spilling a variable of
4530 * recording debug info. Does NOT throw anything if we're out of
4531 * registers, though.
4532 */
4533static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4534 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4535{
4536 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4537 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4538 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4539
4540 /*
4541 * Try a freed register that's shadowing a guest register.
4542 */
4543 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4544 if (fRegs)
4545 {
4546 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4547
4548#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4549 /*
4550 * When we have livness information, we use it to kick out all shadowed
4551 * guest register that will not be needed any more in this TB. If we're
4552 * lucky, this may prevent us from ending up here again.
4553 *
4554 * Note! We must consider the previous entry here so we don't free
4555 * anything that the current threaded function requires (current
4556 * entry is produced by the next threaded function).
4557 */
4558 uint32_t const idxCurCall = pReNative->idxCurCall;
4559 if (idxCurCall > 0)
4560 {
4561 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4562
4563# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4564 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4565 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4566 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4567#else
4568 /* Construct a mask of the registers not in the read or write state.
4569 Note! We could skips writes, if they aren't from us, as this is just
4570 a hack to prevent trashing registers that have just been written
4571 or will be written when we retire the current instruction. */
4572 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4573 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4574 & IEMLIVENESSBIT_MASK;
4575#endif
4576 /* If it matches any shadowed registers. */
4577 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4578 {
4579 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4580 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4581 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4582
4583 /* See if we've got any unshadowed registers we can return now. */
4584 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4585 if (fUnshadowedRegs)
4586 {
4587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4588 return (fPreferVolatile
4589 ? ASMBitFirstSetU32(fUnshadowedRegs)
4590 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4591 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4592 - 1;
4593 }
4594 }
4595 }
4596#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4597
4598 unsigned const idxReg = (fPreferVolatile
4599 ? ASMBitFirstSetU32(fRegs)
4600 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4601 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4602 - 1;
4603
4604 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4605 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4606 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4607 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4608
4609 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4610 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4611
4612 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4613 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4614 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4615 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4616 return idxReg;
4617 }
4618
4619 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4620
4621 /*
4622 * Try free up a variable that's in a register.
4623 *
4624 * We do two rounds here, first evacuating variables we don't need to be
4625 * saved on the stack, then in the second round move things to the stack.
4626 */
4627 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4628 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4629 {
4630 uint32_t fVars = pReNative->Core.bmVars;
4631 while (fVars)
4632 {
4633 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4634 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4635 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4636 continue;
4637
4638 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4639 && (RT_BIT_32(idxReg) & fRegMask)
4640 && ( iLoop == 0
4641 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4642 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4643 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4644 {
4645 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4646 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4647 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4648 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4649 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4650 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4651
4652 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4653 {
4654 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4655 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4656 }
4657
4658 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4659 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4660
4661 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4662 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4663 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4664 return idxReg;
4665 }
4666 fVars &= ~RT_BIT_32(idxVar);
4667 }
4668 }
4669
4670 AssertFailed();
4671 return UINT8_MAX;
4672}
4673
4674
4675/**
4676 * Flushes a set of guest register shadow copies.
4677 *
4678 * This is usually done after calling a threaded function or a C-implementation
4679 * of an instruction.
4680 *
4681 * @param pReNative The native recompile state.
4682 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4683 */
4684DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4685{
4686 /*
4687 * Reduce the mask by what's currently shadowed
4688 */
4689 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4690 fGstSimdRegs &= bmGstSimdRegShadows;
4691 if (fGstSimdRegs)
4692 {
4693 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4694 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4695 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4696 if (bmGstSimdRegShadowsNew)
4697 {
4698 /*
4699 * Partial.
4700 */
4701 do
4702 {
4703 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4704 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4705 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4706 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4707 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4708 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4709
4710 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4711 fGstSimdRegs &= ~fInThisHstReg;
4712 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4713 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4714 if (!fGstRegShadowsNew)
4715 {
4716 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4717 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4718 }
4719 } while (fGstSimdRegs != 0);
4720 }
4721 else
4722 {
4723 /*
4724 * Clear all.
4725 */
4726 do
4727 {
4728 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4729 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4730 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4731 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4732 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4733 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4734
4735 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4736 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4737 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4738 } while (fGstSimdRegs != 0);
4739 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4740 }
4741 }
4742}
4743
4744
4745/**
4746 * Allocates a temporary host SIMD register.
4747 *
4748 * This may emit code to save register content onto the stack in order to free
4749 * up a register.
4750 *
4751 * @returns The host register number; throws VBox status code on failure,
4752 * so no need to check the return value.
4753 * @param pReNative The native recompile state.
4754 * @param poff Pointer to the variable with the code buffer position.
4755 * This will be update if we need to move a variable from
4756 * register to stack in order to satisfy the request.
4757 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4758 * registers (@c true, default) or the other way around
4759 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4760 */
4761DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4762{
4763 /*
4764 * Try find a completely unused register, preferably a call-volatile one.
4765 */
4766 uint8_t idxSimdReg;
4767 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4768 & ~pReNative->Core.bmHstRegsWithGstShadow
4769 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4770 if (fRegs)
4771 {
4772 if (fPreferVolatile)
4773 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4774 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4775 else
4776 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4777 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4778 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4779 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4780
4781 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4782 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4783 }
4784 else
4785 {
4786 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4787 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4788 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4789 }
4790
4791 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4792 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4793}
4794
4795
4796/**
4797 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4798 * registers.
4799 *
4800 * @returns The host register number; throws VBox status code on failure,
4801 * so no need to check the return value.
4802 * @param pReNative The native recompile state.
4803 * @param poff Pointer to the variable with the code buffer position.
4804 * This will be update if we need to move a variable from
4805 * register to stack in order to satisfy the request.
4806 * @param fRegMask Mask of acceptable registers.
4807 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4808 * registers (@c true, default) or the other way around
4809 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4810 */
4811DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4812 bool fPreferVolatile /*= true*/)
4813{
4814 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4815 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4816
4817 /*
4818 * Try find a completely unused register, preferably a call-volatile one.
4819 */
4820 uint8_t idxSimdReg;
4821 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4822 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4823 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4824 & fRegMask;
4825 if (fRegs)
4826 {
4827 if (fPreferVolatile)
4828 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4829 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4830 else
4831 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4832 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4833 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4834 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4835
4836 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4837 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4838 }
4839 else
4840 {
4841 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4842 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4843 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4844 }
4845
4846 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4847 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4848}
4849
4850
4851/**
4852 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4853 *
4854 * @param pReNative The native recompile state.
4855 * @param idxHstSimdReg The host SIMD register to update the state for.
4856 * @param enmLoadSz The load size to set.
4857 */
4858DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4859 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4860{
4861 /* Everything valid already? -> nothing to do. */
4862 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4863 return;
4864
4865 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4866 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4867 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4868 {
4869 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4870 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4871 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4872 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4873 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4874 }
4875}
4876
4877
4878static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4879 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4880{
4881 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4882 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4883 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4884 {
4885# ifdef RT_ARCH_ARM64
4886 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4887 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4888# endif
4889
4890 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4891 {
4892 switch (enmLoadSzDst)
4893 {
4894 case kIemNativeGstSimdRegLdStSz_256:
4895 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4896 break;
4897 case kIemNativeGstSimdRegLdStSz_Low128:
4898 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4899 break;
4900 case kIemNativeGstSimdRegLdStSz_High128:
4901 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4902 break;
4903 default:
4904 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4905 }
4906
4907 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4908 }
4909 }
4910 else
4911 {
4912 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4913 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4914 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4915 }
4916
4917 return off;
4918}
4919
4920
4921/**
4922 * Allocates a temporary host SIMD register for keeping a guest
4923 * SIMD register value.
4924 *
4925 * Since we may already have a register holding the guest register value,
4926 * code will be emitted to do the loading if that's not the case. Code may also
4927 * be emitted if we have to free up a register to satify the request.
4928 *
4929 * @returns The host register number; throws VBox status code on failure, so no
4930 * need to check the return value.
4931 * @param pReNative The native recompile state.
4932 * @param poff Pointer to the variable with the code buffer
4933 * position. This will be update if we need to move a
4934 * variable from register to stack in order to satisfy
4935 * the request.
4936 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4937 * @param enmIntendedUse How the caller will be using the host register.
4938 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4939 * register is okay (default). The ASSUMPTION here is
4940 * that the caller has already flushed all volatile
4941 * registers, so this is only applied if we allocate a
4942 * new register.
4943 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4944 */
4945DECL_HIDDEN_THROW(uint8_t)
4946iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4947 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4948 bool fNoVolatileRegs /*= false*/)
4949{
4950 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4951#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4952 AssertMsg( pReNative->idxCurCall == 0
4953 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4954 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4955 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4956 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4957 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4958 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4959#endif
4960#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4961 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4962#endif
4963 uint32_t const fRegMask = !fNoVolatileRegs
4964 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4965 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4966
4967 /*
4968 * First check if the guest register value is already in a host register.
4969 */
4970 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4971 {
4972 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4973 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4974 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4975 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4976
4977 /* It's not supposed to be allocated... */
4978 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4979 {
4980 /*
4981 * If the register will trash the guest shadow copy, try find a
4982 * completely unused register we can use instead. If that fails,
4983 * we need to disassociate the host reg from the guest reg.
4984 */
4985 /** @todo would be nice to know if preserving the register is in any way helpful. */
4986 /* If the purpose is calculations, try duplicate the register value as
4987 we'll be clobbering the shadow. */
4988 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4989 && ( ~pReNative->Core.bmHstSimdRegs
4990 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4991 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
4992 {
4993 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
4994
4995 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
4996
4997 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
4998 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
4999 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5000 idxSimdReg = idxRegNew;
5001 }
5002 /* If the current register matches the restrictions, go ahead and allocate
5003 it for the caller. */
5004 else if (fRegMask & RT_BIT_32(idxSimdReg))
5005 {
5006 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5007 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5008 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5009 {
5010 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5011 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5012 else
5013 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5014 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5015 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5016 }
5017 else
5018 {
5019 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5020 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5021 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5022 }
5023 }
5024 /* Otherwise, allocate a register that satisfies the caller and transfer
5025 the shadowing if compatible with the intended use. (This basically
5026 means the call wants a non-volatile register (RSP push/pop scenario).) */
5027 else
5028 {
5029 Assert(fNoVolatileRegs);
5030 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5031 !fNoVolatileRegs
5032 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5033 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5034 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5035 {
5036 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5037 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5038 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5039 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5040 }
5041 else
5042 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5043 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5044 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5045 idxSimdReg = idxRegNew;
5046 }
5047 }
5048 else
5049 {
5050 /*
5051 * Oops. Shadowed guest register already allocated!
5052 *
5053 * Allocate a new register, copy the value and, if updating, the
5054 * guest shadow copy assignment to the new register.
5055 */
5056 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5057 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5058 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5059 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5060
5061 /** @todo share register for readonly access. */
5062 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5063 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5064
5065 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5066 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5067 else
5068 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5069
5070 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5071 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5072 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5073 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5074 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5075 else
5076 {
5077 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5078 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5079 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5080 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5081 }
5082 idxSimdReg = idxRegNew;
5083 }
5084 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5085
5086#ifdef VBOX_STRICT
5087 /* Strict builds: Check that the value is correct. */
5088 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5089 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5090#endif
5091
5092 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5093 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5094 {
5095# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5096 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5097 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5098# endif
5099
5100 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5101 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5102 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5103 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5104 else
5105 {
5106 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5107 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5108 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5109 }
5110 }
5111
5112 return idxSimdReg;
5113 }
5114
5115 /*
5116 * Allocate a new register, load it with the guest value and designate it as a copy of the
5117 */
5118 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5119
5120 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5121 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5122 else
5123 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5124
5125 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5126 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5127
5128 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5129 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5130 {
5131# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5132 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5133 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5134# endif
5135
5136 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5137 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5138 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5139 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5140 else
5141 {
5142 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5143 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5144 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5145 }
5146 }
5147
5148 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5149 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5150
5151 return idxRegNew;
5152}
5153
5154
5155/**
5156 * Flushes guest SIMD register shadow copies held by a set of host registers.
5157 *
5158 * This is used whenever calling an external helper for ensuring that we don't carry on
5159 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5160 *
5161 * @param pReNative The native recompile state.
5162 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5163 */
5164DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5165{
5166 /*
5167 * Reduce the mask by what's currently shadowed.
5168 */
5169 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5170 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5171 if (fHstSimdRegs)
5172 {
5173 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5174 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5175 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5176 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5177 if (bmHstSimdRegsWithGstShadowNew)
5178 {
5179 /*
5180 * Partial (likely).
5181 */
5182 uint64_t fGstShadows = 0;
5183 do
5184 {
5185 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5186 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5187 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5188 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5189 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5190 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5191
5192 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5193 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5194 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5195 } while (fHstSimdRegs != 0);
5196 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5197 }
5198 else
5199 {
5200 /*
5201 * Clear all.
5202 */
5203 do
5204 {
5205 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5206 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5207 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5208 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5209 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5210 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5211
5212 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5213 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5214 } while (fHstSimdRegs != 0);
5215 pReNative->Core.bmGstSimdRegShadows = 0;
5216 }
5217 }
5218}
5219#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5220
5221
5222
5223/*********************************************************************************************************************************
5224* Code emitters for flushing pending guest register writes and sanity checks *
5225*********************************************************************************************************************************/
5226
5227#ifdef VBOX_STRICT
5228/**
5229 * Does internal register allocator sanity checks.
5230 */
5231DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5232{
5233 /*
5234 * Iterate host registers building a guest shadowing set.
5235 */
5236 uint64_t bmGstRegShadows = 0;
5237 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5238 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5239 while (bmHstRegsWithGstShadow)
5240 {
5241 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5242 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5243 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5244
5245 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5246 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5247 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5248 bmGstRegShadows |= fThisGstRegShadows;
5249 while (fThisGstRegShadows)
5250 {
5251 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5252 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5253 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5254 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5255 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5256 }
5257 }
5258 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5259 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5260 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5261
5262 /*
5263 * Now the other way around, checking the guest to host index array.
5264 */
5265 bmHstRegsWithGstShadow = 0;
5266 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5267 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5268 while (bmGstRegShadows)
5269 {
5270 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5271 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5272 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5273
5274 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5275 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5276 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5277 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5278 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5279 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5280 }
5281 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5282 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5283 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5284}
5285#endif /* VBOX_STRICT */
5286
5287
5288/**
5289 * Flushes any delayed guest register writes.
5290 *
5291 * This must be called prior to calling CImpl functions and any helpers that use
5292 * the guest state (like raising exceptions) and such.
5293 *
5294 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5295 * the caller if it wishes to do so.
5296 */
5297DECL_HIDDEN_THROW(uint32_t)
5298iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5299{
5300#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5301 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5302 off = iemNativeEmitPcWriteback(pReNative, off);
5303#else
5304 RT_NOREF(pReNative, fGstShwExcept);
5305#endif
5306
5307#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5308 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5309#endif
5310
5311#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5312 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5313#endif
5314
5315 return off;
5316}
5317
5318
5319#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5320/**
5321 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5322 */
5323DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5324{
5325 Assert(pReNative->Core.offPc);
5326# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5327 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5328 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5329# endif
5330
5331# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5332 /* Allocate a temporary PC register. */
5333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5334
5335 /* Perform the addition and store the result. */
5336 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5337 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5338
5339 /* Free but don't flush the PC register. */
5340 iemNativeRegFreeTmp(pReNative, idxPcReg);
5341# else
5342 /* Compare the shadow with the context value, they should match. */
5343 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5344 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5345# endif
5346
5347 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5348 pReNative->Core.offPc = 0;
5349 pReNative->Core.cInstrPcUpdateSkipped = 0;
5350
5351 return off;
5352}
5353#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5354
5355
5356/*********************************************************************************************************************************
5357* Code Emitters (larger snippets) *
5358*********************************************************************************************************************************/
5359
5360/**
5361 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5362 * extending to 64-bit width.
5363 *
5364 * @returns New code buffer offset on success, UINT32_MAX on failure.
5365 * @param pReNative .
5366 * @param off The current code buffer position.
5367 * @param idxHstReg The host register to load the guest register value into.
5368 * @param enmGstReg The guest register to load.
5369 *
5370 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5371 * that is something the caller needs to do if applicable.
5372 */
5373DECL_HIDDEN_THROW(uint32_t)
5374iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5375{
5376 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5377 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5378
5379 switch (g_aGstShadowInfo[enmGstReg].cb)
5380 {
5381 case sizeof(uint64_t):
5382 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5383 case sizeof(uint32_t):
5384 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5385 case sizeof(uint16_t):
5386 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5387#if 0 /* not present in the table. */
5388 case sizeof(uint8_t):
5389 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5390#endif
5391 default:
5392 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5393 }
5394}
5395
5396
5397#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5398/**
5399 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5400 *
5401 * @returns New code buffer offset on success, UINT32_MAX on failure.
5402 * @param pReNative The recompiler state.
5403 * @param off The current code buffer position.
5404 * @param idxHstSimdReg The host register to load the guest register value into.
5405 * @param enmGstSimdReg The guest register to load.
5406 * @param enmLoadSz The load size of the register.
5407 *
5408 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5409 * that is something the caller needs to do if applicable.
5410 */
5411DECL_HIDDEN_THROW(uint32_t)
5412iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5413 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5414{
5415 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5416
5417 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5418 switch (enmLoadSz)
5419 {
5420 case kIemNativeGstSimdRegLdStSz_256:
5421 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5422 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5423 case kIemNativeGstSimdRegLdStSz_Low128:
5424 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5425 case kIemNativeGstSimdRegLdStSz_High128:
5426 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5427 default:
5428 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5429 }
5430}
5431#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5432
5433#ifdef VBOX_STRICT
5434
5435/**
5436 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5437 *
5438 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5439 * Trashes EFLAGS on AMD64.
5440 */
5441DECL_HIDDEN_THROW(uint32_t)
5442iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5443{
5444# ifdef RT_ARCH_AMD64
5445 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5446
5447 /* rol reg64, 32 */
5448 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5449 pbCodeBuf[off++] = 0xc1;
5450 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5451 pbCodeBuf[off++] = 32;
5452
5453 /* test reg32, ffffffffh */
5454 if (idxReg >= 8)
5455 pbCodeBuf[off++] = X86_OP_REX_B;
5456 pbCodeBuf[off++] = 0xf7;
5457 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5458 pbCodeBuf[off++] = 0xff;
5459 pbCodeBuf[off++] = 0xff;
5460 pbCodeBuf[off++] = 0xff;
5461 pbCodeBuf[off++] = 0xff;
5462
5463 /* je/jz +1 */
5464 pbCodeBuf[off++] = 0x74;
5465 pbCodeBuf[off++] = 0x01;
5466
5467 /* int3 */
5468 pbCodeBuf[off++] = 0xcc;
5469
5470 /* rol reg64, 32 */
5471 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5472 pbCodeBuf[off++] = 0xc1;
5473 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5474 pbCodeBuf[off++] = 32;
5475
5476# elif defined(RT_ARCH_ARM64)
5477 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5478 /* lsr tmp0, reg64, #32 */
5479 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5480 /* cbz tmp0, +1 */
5481 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5482 /* brk #0x1100 */
5483 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5484
5485# else
5486# error "Port me!"
5487# endif
5488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5489 return off;
5490}
5491
5492
5493/**
5494 * Emitting code that checks that the content of register @a idxReg is the same
5495 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5496 * instruction if that's not the case.
5497 *
5498 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5499 * Trashes EFLAGS on AMD64.
5500 */
5501DECL_HIDDEN_THROW(uint32_t)
5502iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5503{
5504#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5505 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5506 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5507 return off;
5508#endif
5509
5510# ifdef RT_ARCH_AMD64
5511 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5512
5513 /* cmp reg, [mem] */
5514 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5515 {
5516 if (idxReg >= 8)
5517 pbCodeBuf[off++] = X86_OP_REX_R;
5518 pbCodeBuf[off++] = 0x38;
5519 }
5520 else
5521 {
5522 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5523 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5524 else
5525 {
5526 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5527 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5528 else
5529 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5531 if (idxReg >= 8)
5532 pbCodeBuf[off++] = X86_OP_REX_R;
5533 }
5534 pbCodeBuf[off++] = 0x39;
5535 }
5536 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5537
5538 /* je/jz +1 */
5539 pbCodeBuf[off++] = 0x74;
5540 pbCodeBuf[off++] = 0x01;
5541
5542 /* int3 */
5543 pbCodeBuf[off++] = 0xcc;
5544
5545 /* For values smaller than the register size, we must check that the rest
5546 of the register is all zeros. */
5547 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5548 {
5549 /* test reg64, imm32 */
5550 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5551 pbCodeBuf[off++] = 0xf7;
5552 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5553 pbCodeBuf[off++] = 0;
5554 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5555 pbCodeBuf[off++] = 0xff;
5556 pbCodeBuf[off++] = 0xff;
5557
5558 /* je/jz +1 */
5559 pbCodeBuf[off++] = 0x74;
5560 pbCodeBuf[off++] = 0x01;
5561
5562 /* int3 */
5563 pbCodeBuf[off++] = 0xcc;
5564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5565 }
5566 else
5567 {
5568 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5569 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5570 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5571 }
5572
5573# elif defined(RT_ARCH_ARM64)
5574 /* mov TMP0, [gstreg] */
5575 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5576
5577 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5578 /* sub tmp0, tmp0, idxReg */
5579 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5580 /* cbz tmp0, +1 */
5581 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5582 /* brk #0x1000+enmGstReg */
5583 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5585
5586# else
5587# error "Port me!"
5588# endif
5589 return off;
5590}
5591
5592
5593# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5594# ifdef RT_ARCH_AMD64
5595/**
5596 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5597 */
5598DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5599{
5600 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5601 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5602 if (idxSimdReg >= 8)
5603 pbCodeBuf[off++] = X86_OP_REX_R;
5604 pbCodeBuf[off++] = 0x0f;
5605 pbCodeBuf[off++] = 0x38;
5606 pbCodeBuf[off++] = 0x29;
5607 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5608
5609 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5610 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5611 pbCodeBuf[off++] = X86_OP_REX_W
5612 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5613 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5614 pbCodeBuf[off++] = 0x0f;
5615 pbCodeBuf[off++] = 0x3a;
5616 pbCodeBuf[off++] = 0x16;
5617 pbCodeBuf[off++] = 0xeb;
5618 pbCodeBuf[off++] = 0x00;
5619
5620 /* cmp tmp0, 0xffffffffffffffff. */
5621 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5622 pbCodeBuf[off++] = 0x83;
5623 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5624 pbCodeBuf[off++] = 0xff;
5625
5626 /* je/jz +1 */
5627 pbCodeBuf[off++] = 0x74;
5628 pbCodeBuf[off++] = 0x01;
5629
5630 /* int3 */
5631 pbCodeBuf[off++] = 0xcc;
5632
5633 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5634 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5635 pbCodeBuf[off++] = X86_OP_REX_W
5636 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5637 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5638 pbCodeBuf[off++] = 0x0f;
5639 pbCodeBuf[off++] = 0x3a;
5640 pbCodeBuf[off++] = 0x16;
5641 pbCodeBuf[off++] = 0xeb;
5642 pbCodeBuf[off++] = 0x01;
5643
5644 /* cmp tmp0, 0xffffffffffffffff. */
5645 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5646 pbCodeBuf[off++] = 0x83;
5647 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5648 pbCodeBuf[off++] = 0xff;
5649
5650 /* je/jz +1 */
5651 pbCodeBuf[off++] = 0x74;
5652 pbCodeBuf[off++] = 0x01;
5653
5654 /* int3 */
5655 pbCodeBuf[off++] = 0xcc;
5656
5657 return off;
5658}
5659# endif
5660
5661
5662/**
5663 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5664 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5665 * instruction if that's not the case.
5666 *
5667 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5668 * Trashes EFLAGS on AMD64.
5669 */
5670DECL_HIDDEN_THROW(uint32_t)
5671iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5672 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5673{
5674 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5675 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5676 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5677 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5678 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5679 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5680 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5681 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5682 return off;
5683
5684# ifdef RT_ARCH_AMD64
5685 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5686 {
5687 /* movdqa vectmp0, idxSimdReg */
5688 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5689
5690 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5691
5692 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5693 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5694 }
5695
5696 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5697 {
5698 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5699 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5700
5701 /* vextracti128 vectmp0, idxSimdReg, 1 */
5702 pbCodeBuf[off++] = X86_OP_VEX3;
5703 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5704 | X86_OP_VEX3_BYTE1_X
5705 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5706 | 0x03; /* Opcode map */
5707 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5708 pbCodeBuf[off++] = 0x39;
5709 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5710 pbCodeBuf[off++] = 0x01;
5711
5712 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5713 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5714 }
5715# elif defined(RT_ARCH_ARM64)
5716 /* mov vectmp0, [gstreg] */
5717 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5718
5719 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5720 {
5721 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5722 /* eor vectmp0, vectmp0, idxSimdReg */
5723 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5724 /* uaddlv vectmp0, vectmp0.16B */
5725 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5726 /* umov tmp0, vectmp0.H[0] */
5727 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5728 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5729 /* cbz tmp0, +1 */
5730 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5731 /* brk #0x1000+enmGstReg */
5732 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5733 }
5734
5735 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5736 {
5737 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5738 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5739 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5740 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5741 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5742 /* umov tmp0, (vectmp0 + 1).H[0] */
5743 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5744 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5745 /* cbz tmp0, +1 */
5746 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5747 /* brk #0x1000+enmGstReg */
5748 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5749 }
5750
5751# else
5752# error "Port me!"
5753# endif
5754
5755 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5756 return off;
5757}
5758# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5759
5760
5761/**
5762 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5763 * important bits.
5764 *
5765 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5766 * Trashes EFLAGS on AMD64.
5767 */
5768DECL_HIDDEN_THROW(uint32_t)
5769iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5770{
5771 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5772 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5773 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5774 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5775
5776#ifdef RT_ARCH_AMD64
5777 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5778
5779 /* je/jz +1 */
5780 pbCodeBuf[off++] = 0x74;
5781 pbCodeBuf[off++] = 0x01;
5782
5783 /* int3 */
5784 pbCodeBuf[off++] = 0xcc;
5785
5786# elif defined(RT_ARCH_ARM64)
5787 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5788
5789 /* b.eq +1 */
5790 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5791 /* brk #0x2000 */
5792 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5793
5794# else
5795# error "Port me!"
5796# endif
5797 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5798
5799 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5800 return off;
5801}
5802
5803#endif /* VBOX_STRICT */
5804
5805
5806#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5807/**
5808 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5809 */
5810DECL_HIDDEN_THROW(uint32_t)
5811iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5812{
5813 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5814
5815 fEflNeeded &= X86_EFL_STATUS_BITS;
5816 if (fEflNeeded)
5817 {
5818# ifdef RT_ARCH_AMD64
5819 /* test dword [pVCpu + offVCpu], imm32 */
5820 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5821 if (fEflNeeded <= 0xff)
5822 {
5823 pCodeBuf[off++] = 0xf6;
5824 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5825 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5826 }
5827 else
5828 {
5829 pCodeBuf[off++] = 0xf7;
5830 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5831 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5832 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5833 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5834 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5835 }
5836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5837
5838# else
5839 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5840 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5841 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5842# ifdef RT_ARCH_ARM64
5843 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5844 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5845# else
5846# error "Port me!"
5847# endif
5848 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5849# endif
5850 }
5851 return off;
5852}
5853#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5854
5855
5856/**
5857 * Emits a code for checking the return code of a call and rcPassUp, returning
5858 * from the code if either are non-zero.
5859 */
5860DECL_HIDDEN_THROW(uint32_t)
5861iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5862{
5863#ifdef RT_ARCH_AMD64
5864 /*
5865 * AMD64: eax = call status code.
5866 */
5867
5868 /* edx = rcPassUp */
5869 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5870# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5871 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5872# endif
5873
5874 /* edx = eax | rcPassUp */
5875 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5876 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5877 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5879
5880 /* Jump to non-zero status return path. */
5881 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5882
5883 /* done. */
5884
5885#elif RT_ARCH_ARM64
5886 /*
5887 * ARM64: w0 = call status code.
5888 */
5889# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5890 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5891# endif
5892 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5893
5894 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5895
5896 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5897
5898 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5899 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5900 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5901
5902#else
5903# error "port me"
5904#endif
5905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5906 RT_NOREF_PV(idxInstr);
5907 return off;
5908}
5909
5910
5911/**
5912 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5913 * raising a \#GP(0) if it isn't.
5914 *
5915 * @returns New code buffer offset, UINT32_MAX on failure.
5916 * @param pReNative The native recompile state.
5917 * @param off The code buffer offset.
5918 * @param idxAddrReg The host register with the address to check.
5919 * @param idxInstr The current instruction.
5920 */
5921DECL_HIDDEN_THROW(uint32_t)
5922iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5923{
5924 /*
5925 * Make sure we don't have any outstanding guest register writes as we may
5926 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5927 */
5928 off = iemNativeRegFlushPendingWrites(pReNative, off);
5929
5930#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5931 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5932#else
5933 RT_NOREF(idxInstr);
5934#endif
5935
5936#ifdef RT_ARCH_AMD64
5937 /*
5938 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5939 * return raisexcpt();
5940 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5941 */
5942 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5943
5944 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5945 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5946 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5947 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5948 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5949
5950 iemNativeRegFreeTmp(pReNative, iTmpReg);
5951
5952#elif defined(RT_ARCH_ARM64)
5953 /*
5954 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5955 * return raisexcpt();
5956 * ----
5957 * mov x1, 0x800000000000
5958 * add x1, x0, x1
5959 * cmp xzr, x1, lsr 48
5960 * b.ne .Lraisexcpt
5961 */
5962 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5963
5964 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5965 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5966 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5967 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5968
5969 iemNativeRegFreeTmp(pReNative, iTmpReg);
5970
5971#else
5972# error "Port me"
5973#endif
5974 return off;
5975}
5976
5977
5978/**
5979 * Emits code to check if that the content of @a idxAddrReg is within the limit
5980 * of CS, raising a \#GP(0) if it isn't.
5981 *
5982 * @returns New code buffer offset; throws VBox status code on error.
5983 * @param pReNative The native recompile state.
5984 * @param off The code buffer offset.
5985 * @param idxAddrReg The host register (32-bit) with the address to
5986 * check.
5987 * @param idxInstr The current instruction.
5988 */
5989DECL_HIDDEN_THROW(uint32_t)
5990iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5991 uint8_t idxAddrReg, uint8_t idxInstr)
5992{
5993 /*
5994 * Make sure we don't have any outstanding guest register writes as we may
5995 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5996 */
5997 off = iemNativeRegFlushPendingWrites(pReNative, off);
5998
5999#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6000 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6001#else
6002 RT_NOREF(idxInstr);
6003#endif
6004
6005 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6006 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6007 kIemNativeGstRegUse_ReadOnly);
6008
6009 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6010 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6011
6012 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6013 return off;
6014}
6015
6016
6017/**
6018 * Emits a call to a CImpl function or something similar.
6019 */
6020DECL_HIDDEN_THROW(uint32_t)
6021iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6022 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6023{
6024 /* Writeback everything. */
6025 off = iemNativeRegFlushPendingWrites(pReNative, off);
6026
6027 /*
6028 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6029 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6030 */
6031 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6032 fGstShwFlush
6033 | RT_BIT_64(kIemNativeGstReg_Pc)
6034 | RT_BIT_64(kIemNativeGstReg_EFlags));
6035 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6036
6037 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6038
6039 /*
6040 * Load the parameters.
6041 */
6042#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6043 /* Special code the hidden VBOXSTRICTRC pointer. */
6044 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6045 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6046 if (cAddParams > 0)
6047 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6048 if (cAddParams > 1)
6049 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6050 if (cAddParams > 2)
6051 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6052 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6053
6054#else
6055 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6056 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6057 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6058 if (cAddParams > 0)
6059 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6060 if (cAddParams > 1)
6061 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6062 if (cAddParams > 2)
6063# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6064 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6065# else
6066 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6067# endif
6068#endif
6069
6070 /*
6071 * Make the call.
6072 */
6073 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6074
6075#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6076 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6077#endif
6078
6079 /*
6080 * Check the status code.
6081 */
6082 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6083}
6084
6085
6086/**
6087 * Emits a call to a threaded worker function.
6088 */
6089DECL_HIDDEN_THROW(uint32_t)
6090iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6091{
6092 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6093
6094 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6095 off = iemNativeRegFlushPendingWrites(pReNative, off);
6096
6097 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6098 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6099
6100#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6101 /* The threaded function may throw / long jmp, so set current instruction
6102 number if we're counting. */
6103 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6104#endif
6105
6106 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6107
6108#ifdef RT_ARCH_AMD64
6109 /* Load the parameters and emit the call. */
6110# ifdef RT_OS_WINDOWS
6111# ifndef VBOXSTRICTRC_STRICT_ENABLED
6112 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6113 if (cParams > 0)
6114 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6115 if (cParams > 1)
6116 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6117 if (cParams > 2)
6118 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6119# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6120 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6121 if (cParams > 0)
6122 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6123 if (cParams > 1)
6124 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6125 if (cParams > 2)
6126 {
6127 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6128 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6129 }
6130 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6131# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6132# else
6133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6134 if (cParams > 0)
6135 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6136 if (cParams > 1)
6137 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6138 if (cParams > 2)
6139 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6140# endif
6141
6142 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6143
6144# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6145 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6146# endif
6147
6148#elif RT_ARCH_ARM64
6149 /*
6150 * ARM64:
6151 */
6152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6153 if (cParams > 0)
6154 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6155 if (cParams > 1)
6156 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6157 if (cParams > 2)
6158 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6159
6160 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6161
6162#else
6163# error "port me"
6164#endif
6165
6166 /*
6167 * Check the status code.
6168 */
6169 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6170
6171 return off;
6172}
6173
6174#ifdef VBOX_WITH_STATISTICS
6175/**
6176 * Emits code to update the thread call statistics.
6177 */
6178DECL_INLINE_THROW(uint32_t)
6179iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6180{
6181 /*
6182 * Update threaded function stats.
6183 */
6184 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6185 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6186# if defined(RT_ARCH_ARM64)
6187 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6188 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6189 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6190 iemNativeRegFreeTmp(pReNative, idxTmp1);
6191 iemNativeRegFreeTmp(pReNative, idxTmp2);
6192# else
6193 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6194# endif
6195 return off;
6196}
6197#endif /* VBOX_WITH_STATISTICS */
6198
6199
6200/**
6201 * Emits the code at the ReturnWithFlags label (returns
6202 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6203 */
6204static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6205{
6206 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6207 if (idxLabel != UINT32_MAX)
6208 {
6209 iemNativeLabelDefine(pReNative, idxLabel, off);
6210
6211 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6212
6213 /* jump back to the return sequence. */
6214 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6215 }
6216 return off;
6217}
6218
6219
6220/**
6221 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6222 */
6223static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6224{
6225 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6226 if (idxLabel != UINT32_MAX)
6227 {
6228 iemNativeLabelDefine(pReNative, idxLabel, off);
6229
6230 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6231
6232 /* jump back to the return sequence. */
6233 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6234 }
6235 return off;
6236}
6237
6238
6239/**
6240 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6241 */
6242static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6243{
6244 /*
6245 * Generate the rc + rcPassUp fiddling code if needed.
6246 */
6247 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6248 if (idxLabel != UINT32_MAX)
6249 {
6250 iemNativeLabelDefine(pReNative, idxLabel, off);
6251
6252 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6253#ifdef RT_ARCH_AMD64
6254# ifdef RT_OS_WINDOWS
6255# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6256 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6257# endif
6258 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6259 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6260# else
6261 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6262 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6263# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6264 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6265# endif
6266# endif
6267# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6268 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6269# endif
6270
6271#else
6272 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6273 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6274 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6275#endif
6276
6277 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6278 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6279 }
6280 return off;
6281}
6282
6283
6284/**
6285 * Emits a standard epilog.
6286 */
6287static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6288{
6289 *pidxReturnLabel = UINT32_MAX;
6290
6291 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6292 off = iemNativeRegFlushPendingWrites(pReNative, off);
6293
6294 /*
6295 * Successful return, so clear the return register (eax, w0).
6296 */
6297 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6298
6299 /*
6300 * Define label for common return point.
6301 */
6302 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6303 *pidxReturnLabel = idxReturn;
6304
6305 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6306
6307 /*
6308 * Restore registers and return.
6309 */
6310#ifdef RT_ARCH_AMD64
6311 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6312
6313 /* Reposition esp at the r15 restore point. */
6314 pbCodeBuf[off++] = X86_OP_REX_W;
6315 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6317 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6318
6319 /* Pop non-volatile registers and return */
6320 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6321 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6322 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6323 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6324 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6325 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6326 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6327 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6328# ifdef RT_OS_WINDOWS
6329 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6330 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6331# endif
6332 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6333 pbCodeBuf[off++] = 0xc9; /* leave */
6334 pbCodeBuf[off++] = 0xc3; /* ret */
6335 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6336
6337#elif RT_ARCH_ARM64
6338 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6339
6340 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6341 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6342 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6343 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6344 IEMNATIVE_FRAME_VAR_SIZE / 8);
6345 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6347 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6348 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6349 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6350 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6351 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6352 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6353 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6354 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6355 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6356 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6357
6358 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6359 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6360 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6361 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6362
6363 /* retab / ret */
6364# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6365 if (1)
6366 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6367 else
6368# endif
6369 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6370
6371#else
6372# error "port me"
6373#endif
6374 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6375
6376 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6377}
6378
6379
6380/**
6381 * Emits a standard prolog.
6382 */
6383static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6384{
6385#ifdef RT_ARCH_AMD64
6386 /*
6387 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6388 * reserving 64 bytes for stack variables plus 4 non-register argument
6389 * slots. Fixed register assignment: xBX = pReNative;
6390 *
6391 * Since we always do the same register spilling, we can use the same
6392 * unwind description for all the code.
6393 */
6394 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6395 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6396 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6397 pbCodeBuf[off++] = 0x8b;
6398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6399 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6400 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6401# ifdef RT_OS_WINDOWS
6402 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6403 pbCodeBuf[off++] = 0x8b;
6404 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6405 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6406 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6407# else
6408 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6409 pbCodeBuf[off++] = 0x8b;
6410 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6411# endif
6412 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6413 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6414 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6415 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6416 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6417 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6418 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6419 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6420
6421# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6422 /* Save the frame pointer. */
6423 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6424# endif
6425
6426 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6427 X86_GREG_xSP,
6428 IEMNATIVE_FRAME_ALIGN_SIZE
6429 + IEMNATIVE_FRAME_VAR_SIZE
6430 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6431 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6432 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6433 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6434 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6435
6436#elif RT_ARCH_ARM64
6437 /*
6438 * We set up a stack frame exactly like on x86, only we have to push the
6439 * return address our selves here. We save all non-volatile registers.
6440 */
6441 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6442
6443# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6444 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6445 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6446 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6447 /* pacibsp */
6448 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6449# endif
6450
6451 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6452 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6453 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6454 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6455 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6456 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6457 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6458 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6459 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6460 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6461 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6462 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6463 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6464 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6465 /* Save the BP and LR (ret address) registers at the top of the frame. */
6466 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6467 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6468 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6469 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6470 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6471 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6472
6473 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6474 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6475
6476 /* mov r28, r0 */
6477 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6478 /* mov r27, r1 */
6479 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6480
6481# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6482 /* Save the frame pointer. */
6483 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6484 ARMV8_A64_REG_X2);
6485# endif
6486
6487#else
6488# error "port me"
6489#endif
6490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6491 return off;
6492}
6493
6494
6495/*********************************************************************************************************************************
6496* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6497*********************************************************************************************************************************/
6498
6499/**
6500 * Internal work that allocates a variable with kind set to
6501 * kIemNativeVarKind_Invalid and no current stack allocation.
6502 *
6503 * The kind will either be set by the caller or later when the variable is first
6504 * assigned a value.
6505 *
6506 * @returns Unpacked index.
6507 * @internal
6508 */
6509static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6510{
6511 Assert(cbType > 0 && cbType <= 64);
6512 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6513 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6514 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6515 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6516 pReNative->Core.aVars[idxVar].cbVar = cbType;
6517 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6518 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6519 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6520 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6521 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6522 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6523 pReNative->Core.aVars[idxVar].u.uValue = 0;
6524#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6525 pReNative->Core.aVars[idxVar].fSimdReg = false;
6526#endif
6527 return idxVar;
6528}
6529
6530
6531/**
6532 * Internal work that allocates an argument variable w/o setting enmKind.
6533 *
6534 * @returns Unpacked index.
6535 * @internal
6536 */
6537static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6538{
6539 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6540 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6541 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6542
6543 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6544 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6545 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6546 return idxVar;
6547}
6548
6549
6550/**
6551 * Gets the stack slot for a stack variable, allocating one if necessary.
6552 *
6553 * Calling this function implies that the stack slot will contain a valid
6554 * variable value. The caller deals with any register currently assigned to the
6555 * variable, typically by spilling it into the stack slot.
6556 *
6557 * @returns The stack slot number.
6558 * @param pReNative The recompiler state.
6559 * @param idxVar The variable.
6560 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6561 */
6562DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6563{
6564 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6565 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6566 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6567
6568 /* Already got a slot? */
6569 uint8_t const idxStackSlot = pVar->idxStackSlot;
6570 if (idxStackSlot != UINT8_MAX)
6571 {
6572 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6573 return idxStackSlot;
6574 }
6575
6576 /*
6577 * A single slot is easy to allocate.
6578 * Allocate them from the top end, closest to BP, to reduce the displacement.
6579 */
6580 if (pVar->cbVar <= sizeof(uint64_t))
6581 {
6582 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6583 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6584 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6585 pVar->idxStackSlot = (uint8_t)iSlot;
6586 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6587 return (uint8_t)iSlot;
6588 }
6589
6590 /*
6591 * We need more than one stack slot.
6592 *
6593 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6594 */
6595 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6596 Assert(pVar->cbVar <= 64);
6597 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6598 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6599 uint32_t bmStack = pReNative->Core.bmStack;
6600 while (bmStack != UINT32_MAX)
6601 {
6602 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6603 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6604 iSlot = (iSlot - 1) & ~fBitAlignMask;
6605 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6606 {
6607 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6608 pVar->idxStackSlot = (uint8_t)iSlot;
6609 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6610 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6611 return (uint8_t)iSlot;
6612 }
6613
6614 bmStack |= (fBitAllocMask << iSlot);
6615 }
6616 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6617}
6618
6619
6620/**
6621 * Changes the variable to a stack variable.
6622 *
6623 * Currently this is s only possible to do the first time the variable is used,
6624 * switching later is can be implemented but not done.
6625 *
6626 * @param pReNative The recompiler state.
6627 * @param idxVar The variable.
6628 * @throws VERR_IEM_VAR_IPE_2
6629 */
6630DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6631{
6632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6633 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6634 if (pVar->enmKind != kIemNativeVarKind_Stack)
6635 {
6636 /* We could in theory transition from immediate to stack as well, but it
6637 would involve the caller doing work storing the value on the stack. So,
6638 till that's required we only allow transition from invalid. */
6639 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6640 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6641 pVar->enmKind = kIemNativeVarKind_Stack;
6642
6643 /* Note! We don't allocate a stack slot here, that's only done when a
6644 slot is actually needed to hold a variable value. */
6645 }
6646}
6647
6648
6649/**
6650 * Sets it to a variable with a constant value.
6651 *
6652 * This does not require stack storage as we know the value and can always
6653 * reload it, unless of course it's referenced.
6654 *
6655 * @param pReNative The recompiler state.
6656 * @param idxVar The variable.
6657 * @param uValue The immediate value.
6658 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6659 */
6660DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6661{
6662 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6663 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6664 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6665 {
6666 /* Only simple transitions for now. */
6667 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6668 pVar->enmKind = kIemNativeVarKind_Immediate;
6669 }
6670 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6671
6672 pVar->u.uValue = uValue;
6673 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6674 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6675 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6676}
6677
6678
6679/**
6680 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6681 *
6682 * This does not require stack storage as we know the value and can always
6683 * reload it. Loading is postponed till needed.
6684 *
6685 * @param pReNative The recompiler state.
6686 * @param idxVar The variable. Unpacked.
6687 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6688 *
6689 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6690 * @internal
6691 */
6692static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6693{
6694 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6695 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6696
6697 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6698 {
6699 /* Only simple transitions for now. */
6700 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6701 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6702 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6703 }
6704 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6705
6706 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6707
6708 /* Update the other variable, ensure it's a stack variable. */
6709 /** @todo handle variables with const values... that'll go boom now. */
6710 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6711 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6712}
6713
6714
6715/**
6716 * Sets the variable to a reference (pointer) to a guest register reference.
6717 *
6718 * This does not require stack storage as we know the value and can always
6719 * reload it. Loading is postponed till needed.
6720 *
6721 * @param pReNative The recompiler state.
6722 * @param idxVar The variable.
6723 * @param enmRegClass The class guest registers to reference.
6724 * @param idxReg The register within @a enmRegClass to reference.
6725 *
6726 * @throws VERR_IEM_VAR_IPE_2
6727 */
6728DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6729 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6730{
6731 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6732 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6733
6734 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6735 {
6736 /* Only simple transitions for now. */
6737 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6738 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6739 }
6740 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6741
6742 pVar->u.GstRegRef.enmClass = enmRegClass;
6743 pVar->u.GstRegRef.idx = idxReg;
6744}
6745
6746
6747DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6748{
6749 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6750}
6751
6752
6753DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6754{
6755 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6756
6757 /* Since we're using a generic uint64_t value type, we must truncate it if
6758 the variable is smaller otherwise we may end up with too large value when
6759 scaling up a imm8 w/ sign-extension.
6760
6761 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6762 in the bios, bx=1) when running on arm, because clang expect 16-bit
6763 register parameters to have bits 16 and up set to zero. Instead of
6764 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6765 CF value in the result. */
6766 switch (cbType)
6767 {
6768 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6769 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6770 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6771 }
6772 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6773 return idxVar;
6774}
6775
6776
6777DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6778{
6779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6780 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6781 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6782 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6783 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6784 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6785
6786 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6787 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6788 return idxArgVar;
6789}
6790
6791
6792DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6793{
6794 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6795 /* Don't set to stack now, leave that to the first use as for instance
6796 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6797 return idxVar;
6798}
6799
6800
6801DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6802{
6803 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6804
6805 /* Since we're using a generic uint64_t value type, we must truncate it if
6806 the variable is smaller otherwise we may end up with too large value when
6807 scaling up a imm8 w/ sign-extension. */
6808 switch (cbType)
6809 {
6810 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6811 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6812 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6813 }
6814 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6815 return idxVar;
6816}
6817
6818
6819DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6820{
6821 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6822 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6823
6824 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6825 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6826
6827 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6828
6829 /* Truncate the value to this variables size. */
6830 switch (cbType)
6831 {
6832 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6833 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6834 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6835 }
6836
6837 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6838 iemNativeVarRegisterRelease(pReNative, idxVar);
6839 return idxVar;
6840}
6841
6842
6843/**
6844 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6845 * fixed till we call iemNativeVarRegisterRelease.
6846 *
6847 * @returns The host register number.
6848 * @param pReNative The recompiler state.
6849 * @param idxVar The variable.
6850 * @param poff Pointer to the instruction buffer offset.
6851 * In case a register needs to be freed up or the value
6852 * loaded off the stack.
6853 * @param fInitialized Set if the variable must already have been initialized.
6854 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6855 * the case.
6856 * @param idxRegPref Preferred register number or UINT8_MAX.
6857 */
6858DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6859 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6860{
6861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6862 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6863 Assert(pVar->cbVar <= 8);
6864 Assert(!pVar->fRegAcquired);
6865
6866 uint8_t idxReg = pVar->idxReg;
6867 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6868 {
6869 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6870 && pVar->enmKind < kIemNativeVarKind_End);
6871 pVar->fRegAcquired = true;
6872 return idxReg;
6873 }
6874
6875 /*
6876 * If the kind of variable has not yet been set, default to 'stack'.
6877 */
6878 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6879 && pVar->enmKind < kIemNativeVarKind_End);
6880 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6881 iemNativeVarSetKindToStack(pReNative, idxVar);
6882
6883 /*
6884 * We have to allocate a register for the variable, even if its a stack one
6885 * as we don't know if there are modification being made to it before its
6886 * finalized (todo: analyze and insert hints about that?).
6887 *
6888 * If we can, we try get the correct register for argument variables. This
6889 * is assuming that most argument variables are fetched as close as possible
6890 * to the actual call, so that there aren't any interfering hidden calls
6891 * (memory accesses, etc) inbetween.
6892 *
6893 * If we cannot or it's a variable, we make sure no argument registers
6894 * that will be used by this MC block will be allocated here, and we always
6895 * prefer non-volatile registers to avoid needing to spill stuff for internal
6896 * call.
6897 */
6898 /** @todo Detect too early argument value fetches and warn about hidden
6899 * calls causing less optimal code to be generated in the python script. */
6900
6901 uint8_t const uArgNo = pVar->uArgNo;
6902 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6903 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6904 {
6905 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6906
6907#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6908 /* Writeback any dirty shadow registers we are about to unshadow. */
6909 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6910#endif
6911
6912 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6913 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6914 }
6915 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6916 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6917 {
6918 /** @todo there must be a better way for this and boot cArgsX? */
6919 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6920 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6921 & ~pReNative->Core.bmHstRegsWithGstShadow
6922 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6923 & fNotArgsMask;
6924 if (fRegs)
6925 {
6926 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6927 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6928 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6929 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6930 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6931 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6932 }
6933 else
6934 {
6935 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6936 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6937 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6938 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6939 }
6940 }
6941 else
6942 {
6943 idxReg = idxRegPref;
6944 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6945 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6946 }
6947 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6948 pVar->idxReg = idxReg;
6949
6950#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6951 pVar->fSimdReg = false;
6952#endif
6953
6954 /*
6955 * Load it off the stack if we've got a stack slot.
6956 */
6957 uint8_t const idxStackSlot = pVar->idxStackSlot;
6958 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6959 {
6960 Assert(fInitialized);
6961 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6962 switch (pVar->cbVar)
6963 {
6964 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6965 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6966 case 3: AssertFailed(); RT_FALL_THRU();
6967 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6968 default: AssertFailed(); RT_FALL_THRU();
6969 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6970 }
6971 }
6972 else
6973 {
6974 Assert(idxStackSlot == UINT8_MAX);
6975 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6976 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6977 else
6978 {
6979 /*
6980 * Convert from immediate to stack/register. This is currently only
6981 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
6982 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
6983 */
6984 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6985 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
6986 idxVar, idxReg, pVar->u.uValue));
6987 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
6988 pVar->enmKind = kIemNativeVarKind_Stack;
6989 }
6990 }
6991
6992 pVar->fRegAcquired = true;
6993 return idxReg;
6994}
6995
6996
6997#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6998/**
6999 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7000 * fixed till we call iemNativeVarRegisterRelease.
7001 *
7002 * @returns The host register number.
7003 * @param pReNative The recompiler state.
7004 * @param idxVar The variable.
7005 * @param poff Pointer to the instruction buffer offset.
7006 * In case a register needs to be freed up or the value
7007 * loaded off the stack.
7008 * @param fInitialized Set if the variable must already have been initialized.
7009 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7010 * the case.
7011 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7012 */
7013DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7014 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7015{
7016 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7017 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7018 Assert( pVar->cbVar == sizeof(RTUINT128U)
7019 || pVar->cbVar == sizeof(RTUINT256U));
7020 Assert(!pVar->fRegAcquired);
7021
7022 uint8_t idxReg = pVar->idxReg;
7023 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7024 {
7025 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7026 && pVar->enmKind < kIemNativeVarKind_End);
7027 pVar->fRegAcquired = true;
7028 return idxReg;
7029 }
7030
7031 /*
7032 * If the kind of variable has not yet been set, default to 'stack'.
7033 */
7034 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7035 && pVar->enmKind < kIemNativeVarKind_End);
7036 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7037 iemNativeVarSetKindToStack(pReNative, idxVar);
7038
7039 /*
7040 * We have to allocate a register for the variable, even if its a stack one
7041 * as we don't know if there are modification being made to it before its
7042 * finalized (todo: analyze and insert hints about that?).
7043 *
7044 * If we can, we try get the correct register for argument variables. This
7045 * is assuming that most argument variables are fetched as close as possible
7046 * to the actual call, so that there aren't any interfering hidden calls
7047 * (memory accesses, etc) inbetween.
7048 *
7049 * If we cannot or it's a variable, we make sure no argument registers
7050 * that will be used by this MC block will be allocated here, and we always
7051 * prefer non-volatile registers to avoid needing to spill stuff for internal
7052 * call.
7053 */
7054 /** @todo Detect too early argument value fetches and warn about hidden
7055 * calls causing less optimal code to be generated in the python script. */
7056
7057 uint8_t const uArgNo = pVar->uArgNo;
7058 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7059
7060 /* SIMD is bit simpler for now because there is no support for arguments. */
7061 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7062 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7063 {
7064 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7065 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7066 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7067 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7068 & fNotArgsMask;
7069 if (fRegs)
7070 {
7071 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7072 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7073 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7074 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7075 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7076 }
7077 else
7078 {
7079 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7080 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7081 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7082 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7083 }
7084 }
7085 else
7086 {
7087 idxReg = idxRegPref;
7088 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7089 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7090 }
7091 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7092
7093 pVar->fSimdReg = true;
7094 pVar->idxReg = idxReg;
7095
7096 /*
7097 * Load it off the stack if we've got a stack slot.
7098 */
7099 uint8_t const idxStackSlot = pVar->idxStackSlot;
7100 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7101 {
7102 Assert(fInitialized);
7103 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7104 switch (pVar->cbVar)
7105 {
7106 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7107 default: AssertFailed(); RT_FALL_THRU();
7108 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7109 }
7110 }
7111 else
7112 {
7113 Assert(idxStackSlot == UINT8_MAX);
7114 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7115 }
7116 pVar->fRegAcquired = true;
7117 return idxReg;
7118}
7119#endif
7120
7121
7122/**
7123 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7124 * guest register.
7125 *
7126 * This function makes sure there is a register for it and sets it to be the
7127 * current shadow copy of @a enmGstReg.
7128 *
7129 * @returns The host register number.
7130 * @param pReNative The recompiler state.
7131 * @param idxVar The variable.
7132 * @param enmGstReg The guest register this variable will be written to
7133 * after this call.
7134 * @param poff Pointer to the instruction buffer offset.
7135 * In case a register needs to be freed up or if the
7136 * variable content needs to be loaded off the stack.
7137 *
7138 * @note We DO NOT expect @a idxVar to be an argument variable,
7139 * because we can only in the commit stage of an instruction when this
7140 * function is used.
7141 */
7142DECL_HIDDEN_THROW(uint8_t)
7143iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7144{
7145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7146 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7147 Assert(!pVar->fRegAcquired);
7148 AssertMsgStmt( pVar->cbVar <= 8
7149 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7150 || pVar->enmKind == kIemNativeVarKind_Stack),
7151 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7152 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7153 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7154
7155 /*
7156 * This shouldn't ever be used for arguments, unless it's in a weird else
7157 * branch that doesn't do any calling and even then it's questionable.
7158 *
7159 * However, in case someone writes crazy wrong MC code and does register
7160 * updates before making calls, just use the regular register allocator to
7161 * ensure we get a register suitable for the intended argument number.
7162 */
7163 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7164
7165 /*
7166 * If there is already a register for the variable, we transfer/set the
7167 * guest shadow copy assignment to it.
7168 */
7169 uint8_t idxReg = pVar->idxReg;
7170 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7171 {
7172#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7173 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7174 {
7175# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7176 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7177 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7178# endif
7179 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7180 }
7181#endif
7182
7183 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7184 {
7185 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7186 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7187 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7188 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7189 }
7190 else
7191 {
7192 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7193 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7194 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7195 }
7196 /** @todo figure this one out. We need some way of making sure the register isn't
7197 * modified after this point, just in case we start writing crappy MC code. */
7198 pVar->enmGstReg = enmGstReg;
7199 pVar->fRegAcquired = true;
7200 return idxReg;
7201 }
7202 Assert(pVar->uArgNo == UINT8_MAX);
7203
7204 /*
7205 * Because this is supposed to be the commit stage, we're just tag along with the
7206 * temporary register allocator and upgrade it to a variable register.
7207 */
7208 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7209 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7210 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7211 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7212 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7213 pVar->idxReg = idxReg;
7214
7215 /*
7216 * Now we need to load the register value.
7217 */
7218 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7219 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7220 else
7221 {
7222 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7223 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7224 switch (pVar->cbVar)
7225 {
7226 case sizeof(uint64_t):
7227 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7228 break;
7229 case sizeof(uint32_t):
7230 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7231 break;
7232 case sizeof(uint16_t):
7233 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7234 break;
7235 case sizeof(uint8_t):
7236 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7237 break;
7238 default:
7239 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7240 }
7241 }
7242
7243 pVar->fRegAcquired = true;
7244 return idxReg;
7245}
7246
7247
7248/**
7249 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7250 *
7251 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7252 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7253 * requirement of flushing anything in volatile host registers when making a
7254 * call.
7255 *
7256 * @returns New @a off value.
7257 * @param pReNative The recompiler state.
7258 * @param off The code buffer position.
7259 * @param fHstRegsNotToSave Set of registers not to save & restore.
7260 */
7261DECL_HIDDEN_THROW(uint32_t)
7262iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7263{
7264 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7265 if (fHstRegs)
7266 {
7267 do
7268 {
7269 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7270 fHstRegs &= ~RT_BIT_32(idxHstReg);
7271
7272 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7273 {
7274 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7275 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7276 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7277 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7278 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7279 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7280 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7281 {
7282 case kIemNativeVarKind_Stack:
7283 {
7284 /* Temporarily spill the variable register. */
7285 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7286 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7287 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7288 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7289 continue;
7290 }
7291
7292 case kIemNativeVarKind_Immediate:
7293 case kIemNativeVarKind_VarRef:
7294 case kIemNativeVarKind_GstRegRef:
7295 /* It is weird to have any of these loaded at this point. */
7296 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7297 continue;
7298
7299 case kIemNativeVarKind_End:
7300 case kIemNativeVarKind_Invalid:
7301 break;
7302 }
7303 AssertFailed();
7304 }
7305 else
7306 {
7307 /*
7308 * Allocate a temporary stack slot and spill the register to it.
7309 */
7310 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7311 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7312 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7313 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7314 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7315 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7316 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7317 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7318 }
7319 } while (fHstRegs);
7320 }
7321#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7322
7323 /*
7324 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7325 * which would be more difficult due to spanning multiple stack slots and different sizes
7326 * (besides we only have a limited amount of slots at the moment).
7327 *
7328 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7329 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7330 */
7331 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7332
7333 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7334 if (fHstRegs)
7335 {
7336 do
7337 {
7338 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7339 fHstRegs &= ~RT_BIT_32(idxHstReg);
7340
7341 /* Fixed reserved and temporary registers don't need saving. */
7342 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7343 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7344 continue;
7345
7346 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7347
7348 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7349 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7350 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7351 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7352 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7353 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7354 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7355 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7356 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7357 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7358 {
7359 case kIemNativeVarKind_Stack:
7360 {
7361 /* Temporarily spill the variable register. */
7362 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7363 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7364 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7365 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7366 if (cbVar == sizeof(RTUINT128U))
7367 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7368 else
7369 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7370 continue;
7371 }
7372
7373 case kIemNativeVarKind_Immediate:
7374 case kIemNativeVarKind_VarRef:
7375 case kIemNativeVarKind_GstRegRef:
7376 /* It is weird to have any of these loaded at this point. */
7377 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7378 continue;
7379
7380 case kIemNativeVarKind_End:
7381 case kIemNativeVarKind_Invalid:
7382 break;
7383 }
7384 AssertFailed();
7385 } while (fHstRegs);
7386 }
7387#endif
7388 return off;
7389}
7390
7391
7392/**
7393 * Emit code to restore volatile registers after to a call to a helper.
7394 *
7395 * @returns New @a off value.
7396 * @param pReNative The recompiler state.
7397 * @param off The code buffer position.
7398 * @param fHstRegsNotToSave Set of registers not to save & restore.
7399 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7400 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7401 */
7402DECL_HIDDEN_THROW(uint32_t)
7403iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7404{
7405 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7406 if (fHstRegs)
7407 {
7408 do
7409 {
7410 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7411 fHstRegs &= ~RT_BIT_32(idxHstReg);
7412
7413 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7414 {
7415 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7416 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7417 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7418 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7419 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7420 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7421 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7422 {
7423 case kIemNativeVarKind_Stack:
7424 {
7425 /* Unspill the variable register. */
7426 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7427 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7428 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7429 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7430 continue;
7431 }
7432
7433 case kIemNativeVarKind_Immediate:
7434 case kIemNativeVarKind_VarRef:
7435 case kIemNativeVarKind_GstRegRef:
7436 /* It is weird to have any of these loaded at this point. */
7437 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7438 continue;
7439
7440 case kIemNativeVarKind_End:
7441 case kIemNativeVarKind_Invalid:
7442 break;
7443 }
7444 AssertFailed();
7445 }
7446 else
7447 {
7448 /*
7449 * Restore from temporary stack slot.
7450 */
7451 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7452 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7453 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7454 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7455
7456 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7457 }
7458 } while (fHstRegs);
7459 }
7460#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7461 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7462 if (fHstRegs)
7463 {
7464 do
7465 {
7466 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7467 fHstRegs &= ~RT_BIT_32(idxHstReg);
7468
7469 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7470 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7471 continue;
7472 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7473
7474 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7475 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7476 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7477 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7478 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7479 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7480 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7481 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7482 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7483 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7484 {
7485 case kIemNativeVarKind_Stack:
7486 {
7487 /* Unspill the variable register. */
7488 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7489 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7490 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7491 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7492
7493 if (cbVar == sizeof(RTUINT128U))
7494 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7495 else
7496 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7497 continue;
7498 }
7499
7500 case kIemNativeVarKind_Immediate:
7501 case kIemNativeVarKind_VarRef:
7502 case kIemNativeVarKind_GstRegRef:
7503 /* It is weird to have any of these loaded at this point. */
7504 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7505 continue;
7506
7507 case kIemNativeVarKind_End:
7508 case kIemNativeVarKind_Invalid:
7509 break;
7510 }
7511 AssertFailed();
7512 } while (fHstRegs);
7513 }
7514#endif
7515 return off;
7516}
7517
7518
7519/**
7520 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7521 *
7522 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7523 *
7524 * ASSUMES that @a idxVar is valid and unpacked.
7525 */
7526DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7527{
7528 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7529 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7530 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7531 {
7532 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7533 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7534 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7535 Assert(cSlots > 0);
7536 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7537 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7538 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7539 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7540 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7541 }
7542 else
7543 Assert(idxStackSlot == UINT8_MAX);
7544}
7545
7546
7547/**
7548 * Worker that frees a single variable.
7549 *
7550 * ASSUMES that @a idxVar is valid and unpacked.
7551 */
7552DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7553{
7554 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7555 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7556 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7557
7558 /* Free the host register first if any assigned. */
7559 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7561 if ( idxHstReg != UINT8_MAX
7562 && pReNative->Core.aVars[idxVar].fSimdReg)
7563 {
7564 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7565 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7566 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7567 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7568 }
7569 else
7570#endif
7571 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7572 {
7573 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7574 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7575 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7576 }
7577
7578 /* Free argument mapping. */
7579 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7580 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7581 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7582
7583 /* Free the stack slots. */
7584 iemNativeVarFreeStackSlots(pReNative, idxVar);
7585
7586 /* Free the actual variable. */
7587 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7588 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7589}
7590
7591
7592/**
7593 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7594 */
7595DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7596{
7597 while (bmVars != 0)
7598 {
7599 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7600 bmVars &= ~RT_BIT_32(idxVar);
7601
7602#if 1 /** @todo optimize by simplifying this later... */
7603 iemNativeVarFreeOneWorker(pReNative, idxVar);
7604#else
7605 /* Only need to free the host register, the rest is done as bulk updates below. */
7606 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7607 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7608 {
7609 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7610 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7611 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7612 }
7613#endif
7614 }
7615#if 0 /** @todo optimize by simplifying this later... */
7616 pReNative->Core.bmVars = 0;
7617 pReNative->Core.bmStack = 0;
7618 pReNative->Core.u64ArgVars = UINT64_MAX;
7619#endif
7620}
7621
7622
7623
7624/*********************************************************************************************************************************
7625* Emitters for IEM_MC_CALL_CIMPL_XXX *
7626*********************************************************************************************************************************/
7627
7628/**
7629 * Emits code to load a reference to the given guest register into @a idxGprDst.
7630 */
7631DECL_HIDDEN_THROW(uint32_t)
7632iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7633 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7634{
7635#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7636 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7637#endif
7638
7639 /*
7640 * Get the offset relative to the CPUMCTX structure.
7641 */
7642 uint32_t offCpumCtx;
7643 switch (enmClass)
7644 {
7645 case kIemNativeGstRegRef_Gpr:
7646 Assert(idxRegInClass < 16);
7647 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7648 break;
7649
7650 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7651 Assert(idxRegInClass < 4);
7652 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7653 break;
7654
7655 case kIemNativeGstRegRef_EFlags:
7656 Assert(idxRegInClass == 0);
7657 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7658 break;
7659
7660 case kIemNativeGstRegRef_MxCsr:
7661 Assert(idxRegInClass == 0);
7662 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7663 break;
7664
7665 case kIemNativeGstRegRef_FpuReg:
7666 Assert(idxRegInClass < 8);
7667 AssertFailed(); /** @todo what kind of indexing? */
7668 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7669 break;
7670
7671 case kIemNativeGstRegRef_MReg:
7672 Assert(idxRegInClass < 8);
7673 AssertFailed(); /** @todo what kind of indexing? */
7674 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7675 break;
7676
7677 case kIemNativeGstRegRef_XReg:
7678 Assert(idxRegInClass < 16);
7679 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7680 break;
7681
7682 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7683 Assert(idxRegInClass == 0);
7684 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7685 break;
7686
7687 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7688 Assert(idxRegInClass == 0);
7689 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7690 break;
7691
7692 default:
7693 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7694 }
7695
7696 /*
7697 * Load the value into the destination register.
7698 */
7699#ifdef RT_ARCH_AMD64
7700 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7701
7702#elif defined(RT_ARCH_ARM64)
7703 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7704 Assert(offCpumCtx < 4096);
7705 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7706
7707#else
7708# error "Port me!"
7709#endif
7710
7711 return off;
7712}
7713
7714
7715/**
7716 * Common code for CIMPL and AIMPL calls.
7717 *
7718 * These are calls that uses argument variables and such. They should not be
7719 * confused with internal calls required to implement an MC operation,
7720 * like a TLB load and similar.
7721 *
7722 * Upon return all that is left to do is to load any hidden arguments and
7723 * perform the call. All argument variables are freed.
7724 *
7725 * @returns New code buffer offset; throws VBox status code on error.
7726 * @param pReNative The native recompile state.
7727 * @param off The code buffer offset.
7728 * @param cArgs The total nubmer of arguments (includes hidden
7729 * count).
7730 * @param cHiddenArgs The number of hidden arguments. The hidden
7731 * arguments must not have any variable declared for
7732 * them, whereas all the regular arguments must
7733 * (tstIEMCheckMc ensures this).
7734 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7735 * this will still flush pending writes in call volatile registers if false.
7736 */
7737DECL_HIDDEN_THROW(uint32_t)
7738iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7739 bool fFlushPendingWrites /*= true*/)
7740{
7741#ifdef VBOX_STRICT
7742 /*
7743 * Assert sanity.
7744 */
7745 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7746 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7747 for (unsigned i = 0; i < cHiddenArgs; i++)
7748 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7749 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7750 {
7751 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7752 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7753 }
7754 iemNativeRegAssertSanity(pReNative);
7755#endif
7756
7757 /* We don't know what the called function makes use of, so flush any pending register writes. */
7758 RT_NOREF(fFlushPendingWrites);
7759#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7760 if (fFlushPendingWrites)
7761#endif
7762 off = iemNativeRegFlushPendingWrites(pReNative, off);
7763
7764 /*
7765 * Before we do anything else, go over variables that are referenced and
7766 * make sure they are not in a register.
7767 */
7768 uint32_t bmVars = pReNative->Core.bmVars;
7769 if (bmVars)
7770 {
7771 do
7772 {
7773 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7774 bmVars &= ~RT_BIT_32(idxVar);
7775
7776 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7777 {
7778 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7779#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7780 if ( idxRegOld != UINT8_MAX
7781 && pReNative->Core.aVars[idxVar].fSimdReg)
7782 {
7783 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7784 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7785
7786 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7787 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7788 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7789 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7790 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7791 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7792 else
7793 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7794
7795 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7796 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7797
7798 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7799 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7800 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7801 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7802 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7803 }
7804 else
7805#endif
7806 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7807 {
7808 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7809 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7810 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7811 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7812 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7813
7814 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7815 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7816 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7817 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7818 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7819 }
7820 }
7821 } while (bmVars != 0);
7822#if 0 //def VBOX_STRICT
7823 iemNativeRegAssertSanity(pReNative);
7824#endif
7825 }
7826
7827 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7828
7829#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7830 /*
7831 * At the very first step go over the host registers that will be used for arguments
7832 * don't shadow anything which needs writing back first.
7833 */
7834 for (uint32_t i = 0; i < cRegArgs; i++)
7835 {
7836 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7837
7838 /* Writeback any dirty guest shadows before using this register. */
7839 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7840 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7841 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7842 }
7843#endif
7844
7845 /*
7846 * First, go over the host registers that will be used for arguments and make
7847 * sure they either hold the desired argument or are free.
7848 */
7849 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7850 {
7851 for (uint32_t i = 0; i < cRegArgs; i++)
7852 {
7853 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7854 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7855 {
7856 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7857 {
7858 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7859 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7860 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7861 Assert(pVar->idxReg == idxArgReg);
7862 uint8_t const uArgNo = pVar->uArgNo;
7863 if (uArgNo == i)
7864 { /* prefect */ }
7865 /* The variable allocator logic should make sure this is impossible,
7866 except for when the return register is used as a parameter (ARM,
7867 but not x86). */
7868#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7869 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7870 {
7871# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7872# error "Implement this"
7873# endif
7874 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7875 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7876 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7877 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7878 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7879 }
7880#endif
7881 else
7882 {
7883 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7884
7885 if (pVar->enmKind == kIemNativeVarKind_Stack)
7886 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7887 else
7888 {
7889 /* just free it, can be reloaded if used again */
7890 pVar->idxReg = UINT8_MAX;
7891 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7892 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7893 }
7894 }
7895 }
7896 else
7897 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7898 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7899 }
7900 }
7901#if 0 //def VBOX_STRICT
7902 iemNativeRegAssertSanity(pReNative);
7903#endif
7904 }
7905
7906 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7907
7908#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7909 /*
7910 * If there are any stack arguments, make sure they are in their place as well.
7911 *
7912 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7913 * the caller) be loading it later and it must be free (see first loop).
7914 */
7915 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7916 {
7917 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7918 {
7919 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7920 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7921 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7922 {
7923 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7924 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7925 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7926 pVar->idxReg = UINT8_MAX;
7927 }
7928 else
7929 {
7930 /* Use ARG0 as temp for stuff we need registers for. */
7931 switch (pVar->enmKind)
7932 {
7933 case kIemNativeVarKind_Stack:
7934 {
7935 uint8_t const idxStackSlot = pVar->idxStackSlot;
7936 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7937 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7938 iemNativeStackCalcBpDisp(idxStackSlot));
7939 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7940 continue;
7941 }
7942
7943 case kIemNativeVarKind_Immediate:
7944 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7945 continue;
7946
7947 case kIemNativeVarKind_VarRef:
7948 {
7949 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7950 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7951 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7952 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7953 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7954# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7955 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
7956 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
7957 if ( fSimdReg
7958 && idxRegOther != UINT8_MAX)
7959 {
7960 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7961 if (cbVar == sizeof(RTUINT128U))
7962 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
7963 else
7964 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
7965 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7966 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7967 }
7968 else
7969# endif
7970 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7971 {
7972 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7973 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7974 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7975 }
7976 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7977 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7978 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7979 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7980 continue;
7981 }
7982
7983 case kIemNativeVarKind_GstRegRef:
7984 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7985 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
7986 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7987 continue;
7988
7989 case kIemNativeVarKind_Invalid:
7990 case kIemNativeVarKind_End:
7991 break;
7992 }
7993 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7994 }
7995 }
7996# if 0 //def VBOX_STRICT
7997 iemNativeRegAssertSanity(pReNative);
7998# endif
7999 }
8000#else
8001 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8002#endif
8003
8004 /*
8005 * Make sure the argument variables are loaded into their respective registers.
8006 *
8007 * We can optimize this by ASSUMING that any register allocations are for
8008 * registeres that have already been loaded and are ready. The previous step
8009 * saw to that.
8010 */
8011 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8012 {
8013 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8014 {
8015 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8016 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8017 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8018 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8019 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8020 else
8021 {
8022 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8023 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8024 {
8025 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8027 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8028 | RT_BIT_32(idxArgReg);
8029 pVar->idxReg = idxArgReg;
8030 }
8031 else
8032 {
8033 /* Use ARG0 as temp for stuff we need registers for. */
8034 switch (pVar->enmKind)
8035 {
8036 case kIemNativeVarKind_Stack:
8037 {
8038 uint8_t const idxStackSlot = pVar->idxStackSlot;
8039 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8040 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8041 continue;
8042 }
8043
8044 case kIemNativeVarKind_Immediate:
8045 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8046 continue;
8047
8048 case kIemNativeVarKind_VarRef:
8049 {
8050 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8051 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8052 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8053 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8054 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8055 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8056#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8057 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8058 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8059 if ( fSimdReg
8060 && idxRegOther != UINT8_MAX)
8061 {
8062 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8063 if (cbVar == sizeof(RTUINT128U))
8064 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8065 else
8066 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8067 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8068 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8069 }
8070 else
8071#endif
8072 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8073 {
8074 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8075 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8076 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8077 }
8078 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8079 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8080 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8081 continue;
8082 }
8083
8084 case kIemNativeVarKind_GstRegRef:
8085 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8086 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8087 continue;
8088
8089 case kIemNativeVarKind_Invalid:
8090 case kIemNativeVarKind_End:
8091 break;
8092 }
8093 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8094 }
8095 }
8096 }
8097#if 0 //def VBOX_STRICT
8098 iemNativeRegAssertSanity(pReNative);
8099#endif
8100 }
8101#ifdef VBOX_STRICT
8102 else
8103 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8104 {
8105 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8106 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8107 }
8108#endif
8109
8110 /*
8111 * Free all argument variables (simplified).
8112 * Their lifetime always expires with the call they are for.
8113 */
8114 /** @todo Make the python script check that arguments aren't used after
8115 * IEM_MC_CALL_XXXX. */
8116 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8117 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8118 * an argument value. There is also some FPU stuff. */
8119 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8120 {
8121 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8122 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8123
8124 /* no need to free registers: */
8125 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8126 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8127 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8128 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8129 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8130 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8131
8132 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8133 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8134 iemNativeVarFreeStackSlots(pReNative, idxVar);
8135 }
8136 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8137
8138 /*
8139 * Flush volatile registers as we make the call.
8140 */
8141 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8142
8143 return off;
8144}
8145
8146
8147
8148/*********************************************************************************************************************************
8149* TLB Lookup. *
8150*********************************************************************************************************************************/
8151
8152/**
8153 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8154 */
8155DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8156{
8157 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8158 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8159 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8160 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8161
8162 /* Do the lookup manually. */
8163 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8164 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8165 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8166 if (RT_LIKELY(pTlbe->uTag == uTag))
8167 {
8168 /*
8169 * Check TLB page table level access flags.
8170 */
8171 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8172 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8173 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8174 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8175 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8176 | IEMTLBE_F_PG_UNASSIGNED
8177 | IEMTLBE_F_PT_NO_ACCESSED
8178 | fNoWriteNoDirty | fNoUser);
8179 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8180 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8181 {
8182 /*
8183 * Return the address.
8184 */
8185 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8186 if ((uintptr_t)pbAddr == uResult)
8187 return;
8188 RT_NOREF(cbMem);
8189 AssertFailed();
8190 }
8191 else
8192 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8193 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8194 }
8195 else
8196 AssertFailed();
8197 RT_BREAKPOINT();
8198}
8199
8200/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8201
8202
8203
8204/*********************************************************************************************************************************
8205* Recompiler Core. *
8206*********************************************************************************************************************************/
8207
8208/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8209static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8210{
8211 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8212 pDis->cbCachedInstr += cbMaxRead;
8213 RT_NOREF(cbMinRead);
8214 return VERR_NO_DATA;
8215}
8216
8217
8218DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8219{
8220 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8221 {
8222#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8223 ENTRY(fLocalForcedActions),
8224 ENTRY(iem.s.rcPassUp),
8225 ENTRY(iem.s.fExec),
8226 ENTRY(iem.s.pbInstrBuf),
8227 ENTRY(iem.s.uInstrBufPc),
8228 ENTRY(iem.s.GCPhysInstrBuf),
8229 ENTRY(iem.s.cbInstrBufTotal),
8230 ENTRY(iem.s.idxTbCurInstr),
8231#ifdef VBOX_WITH_STATISTICS
8232 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8233 ENTRY(iem.s.StatNativeTlbHitsForStore),
8234 ENTRY(iem.s.StatNativeTlbHitsForStack),
8235 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8236 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8237 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8238 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8239 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8240#endif
8241 ENTRY(iem.s.DataTlb.uTlbRevision),
8242 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8243 ENTRY(iem.s.DataTlb.cTlbHits),
8244 ENTRY(iem.s.DataTlb.aEntries),
8245 ENTRY(iem.s.CodeTlb.uTlbRevision),
8246 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8247 ENTRY(iem.s.CodeTlb.cTlbHits),
8248 ENTRY(iem.s.CodeTlb.aEntries),
8249 ENTRY(pVMR3),
8250 ENTRY(cpum.GstCtx.rax),
8251 ENTRY(cpum.GstCtx.ah),
8252 ENTRY(cpum.GstCtx.rcx),
8253 ENTRY(cpum.GstCtx.ch),
8254 ENTRY(cpum.GstCtx.rdx),
8255 ENTRY(cpum.GstCtx.dh),
8256 ENTRY(cpum.GstCtx.rbx),
8257 ENTRY(cpum.GstCtx.bh),
8258 ENTRY(cpum.GstCtx.rsp),
8259 ENTRY(cpum.GstCtx.rbp),
8260 ENTRY(cpum.GstCtx.rsi),
8261 ENTRY(cpum.GstCtx.rdi),
8262 ENTRY(cpum.GstCtx.r8),
8263 ENTRY(cpum.GstCtx.r9),
8264 ENTRY(cpum.GstCtx.r10),
8265 ENTRY(cpum.GstCtx.r11),
8266 ENTRY(cpum.GstCtx.r12),
8267 ENTRY(cpum.GstCtx.r13),
8268 ENTRY(cpum.GstCtx.r14),
8269 ENTRY(cpum.GstCtx.r15),
8270 ENTRY(cpum.GstCtx.es.Sel),
8271 ENTRY(cpum.GstCtx.es.u64Base),
8272 ENTRY(cpum.GstCtx.es.u32Limit),
8273 ENTRY(cpum.GstCtx.es.Attr),
8274 ENTRY(cpum.GstCtx.cs.Sel),
8275 ENTRY(cpum.GstCtx.cs.u64Base),
8276 ENTRY(cpum.GstCtx.cs.u32Limit),
8277 ENTRY(cpum.GstCtx.cs.Attr),
8278 ENTRY(cpum.GstCtx.ss.Sel),
8279 ENTRY(cpum.GstCtx.ss.u64Base),
8280 ENTRY(cpum.GstCtx.ss.u32Limit),
8281 ENTRY(cpum.GstCtx.ss.Attr),
8282 ENTRY(cpum.GstCtx.ds.Sel),
8283 ENTRY(cpum.GstCtx.ds.u64Base),
8284 ENTRY(cpum.GstCtx.ds.u32Limit),
8285 ENTRY(cpum.GstCtx.ds.Attr),
8286 ENTRY(cpum.GstCtx.fs.Sel),
8287 ENTRY(cpum.GstCtx.fs.u64Base),
8288 ENTRY(cpum.GstCtx.fs.u32Limit),
8289 ENTRY(cpum.GstCtx.fs.Attr),
8290 ENTRY(cpum.GstCtx.gs.Sel),
8291 ENTRY(cpum.GstCtx.gs.u64Base),
8292 ENTRY(cpum.GstCtx.gs.u32Limit),
8293 ENTRY(cpum.GstCtx.gs.Attr),
8294 ENTRY(cpum.GstCtx.rip),
8295 ENTRY(cpum.GstCtx.eflags),
8296 ENTRY(cpum.GstCtx.uRipInhibitInt),
8297 ENTRY(cpum.GstCtx.cr0),
8298 ENTRY(cpum.GstCtx.cr4),
8299 ENTRY(cpum.GstCtx.aXcr[0]),
8300 ENTRY(cpum.GstCtx.aXcr[1]),
8301#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8302 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8303 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8304 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8305 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8306 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8307 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8308 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8309 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8310 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8311 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8312 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8313 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8314 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8315 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8316 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8317 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8318 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8319 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8320 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8321 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8322 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8323 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8324 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8325 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8326 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8327 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8328 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8329 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8330 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8331 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8332 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8333 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8334#endif
8335#undef ENTRY
8336 };
8337#ifdef VBOX_STRICT
8338 static bool s_fOrderChecked = false;
8339 if (!s_fOrderChecked)
8340 {
8341 s_fOrderChecked = true;
8342 uint32_t offPrev = s_aMembers[0].off;
8343 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8344 {
8345 Assert(s_aMembers[i].off > offPrev);
8346 offPrev = s_aMembers[i].off;
8347 }
8348 }
8349#endif
8350
8351 /*
8352 * Binary lookup.
8353 */
8354 unsigned iStart = 0;
8355 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8356 for (;;)
8357 {
8358 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8359 uint32_t const offCur = s_aMembers[iCur].off;
8360 if (off < offCur)
8361 {
8362 if (iCur != iStart)
8363 iEnd = iCur;
8364 else
8365 break;
8366 }
8367 else if (off > offCur)
8368 {
8369 if (iCur + 1 < iEnd)
8370 iStart = iCur + 1;
8371 else
8372 break;
8373 }
8374 else
8375 return s_aMembers[iCur].pszName;
8376 }
8377#ifdef VBOX_WITH_STATISTICS
8378 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8379 return "iem.s.acThreadedFuncStats[iFn]";
8380#endif
8381 return NULL;
8382}
8383
8384
8385DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8386{
8387 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8388#if defined(RT_ARCH_AMD64)
8389 static const char * const a_apszMarkers[] =
8390 {
8391 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8392 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8393 };
8394#endif
8395
8396 char szDisBuf[512];
8397 DISSTATE Dis;
8398 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8399 uint32_t const cNative = pTb->Native.cInstructions;
8400 uint32_t offNative = 0;
8401#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8402 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8403#endif
8404 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8405 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8406 : DISCPUMODE_64BIT;
8407#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8408 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8409#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8410 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8411#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8412# error "Port me"
8413#else
8414 csh hDisasm = ~(size_t)0;
8415# if defined(RT_ARCH_AMD64)
8416 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8417# elif defined(RT_ARCH_ARM64)
8418 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8419# else
8420# error "Port me"
8421# endif
8422 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8423
8424 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8425 //Assert(rcCs == CS_ERR_OK);
8426#endif
8427
8428 /*
8429 * Print TB info.
8430 */
8431 pHlp->pfnPrintf(pHlp,
8432 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8433 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8434 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8435 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8436#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8437 if (pDbgInfo && pDbgInfo->cEntries > 1)
8438 {
8439 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8440
8441 /*
8442 * This disassembly is driven by the debug info which follows the native
8443 * code and indicates when it starts with the next guest instructions,
8444 * where labels are and such things.
8445 */
8446 uint32_t idxThreadedCall = 0;
8447 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8448 uint8_t idxRange = UINT8_MAX;
8449 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8450 uint32_t offRange = 0;
8451 uint32_t offOpcodes = 0;
8452 uint32_t const cbOpcodes = pTb->cbOpcodes;
8453 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8454 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8455 uint32_t iDbgEntry = 1;
8456 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8457
8458 while (offNative < cNative)
8459 {
8460 /* If we're at or have passed the point where the next chunk of debug
8461 info starts, process it. */
8462 if (offDbgNativeNext <= offNative)
8463 {
8464 offDbgNativeNext = UINT32_MAX;
8465 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8466 {
8467 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8468 {
8469 case kIemTbDbgEntryType_GuestInstruction:
8470 {
8471 /* Did the exec flag change? */
8472 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8473 {
8474 pHlp->pfnPrintf(pHlp,
8475 " fExec change %#08x -> %#08x %s\n",
8476 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8477 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8478 szDisBuf, sizeof(szDisBuf)));
8479 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8480 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8481 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8482 : DISCPUMODE_64BIT;
8483 }
8484
8485 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8486 where the compilation was aborted before the opcode was recorded and the actual
8487 instruction was translated to a threaded call. This may happen when we run out
8488 of ranges, or when some complicated interrupts/FFs are found to be pending or
8489 similar. So, we just deal with it here rather than in the compiler code as it
8490 is a lot simpler to do here. */
8491 if ( idxRange == UINT8_MAX
8492 || idxRange >= cRanges
8493 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8494 {
8495 idxRange += 1;
8496 if (idxRange < cRanges)
8497 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8498 else
8499 continue;
8500 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8501 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8502 + (pTb->aRanges[idxRange].idxPhysPage == 0
8503 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8504 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8505 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8506 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8507 pTb->aRanges[idxRange].idxPhysPage);
8508 GCPhysPc += offRange;
8509 }
8510
8511 /* Disassemble the instruction. */
8512 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8513 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8514 uint32_t cbInstr = 1;
8515 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8516 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8517 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8518 if (RT_SUCCESS(rc))
8519 {
8520 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8521 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8522 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8523 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8524
8525 static unsigned const s_offMarker = 55;
8526 static char const s_szMarker[] = " ; <--- guest";
8527 if (cch < s_offMarker)
8528 {
8529 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8530 cch = s_offMarker;
8531 }
8532 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8533 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8534
8535 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8536 }
8537 else
8538 {
8539 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8540 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8541 cbInstr = 1;
8542 }
8543 GCPhysPc += cbInstr;
8544 offOpcodes += cbInstr;
8545 offRange += cbInstr;
8546 continue;
8547 }
8548
8549 case kIemTbDbgEntryType_ThreadedCall:
8550 pHlp->pfnPrintf(pHlp,
8551 " Call #%u to %s (%u args) - %s\n",
8552 idxThreadedCall,
8553 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8554 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8555 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8556 idxThreadedCall++;
8557 continue;
8558
8559 case kIemTbDbgEntryType_GuestRegShadowing:
8560 {
8561 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8562 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8563 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8564 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8565 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8566 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8567 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8568 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8569 else
8570 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8571 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8572 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8573 continue;
8574 }
8575
8576#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8577 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8578 {
8579 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8580 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8581 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8582 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8583 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8584 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8585 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8586 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8587 else
8588 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8589 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8590 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8591 continue;
8592 }
8593#endif
8594
8595 case kIemTbDbgEntryType_Label:
8596 {
8597 const char *pszName = "what_the_fudge";
8598 const char *pszComment = "";
8599 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8600 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8601 {
8602 case kIemNativeLabelType_Return: pszName = "Return"; break;
8603 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8604 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8605 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8606 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8607 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8608 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8609 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8610 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8611 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8612 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8613 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8614 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8615 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8616 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8617 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8618 case kIemNativeLabelType_If:
8619 pszName = "If";
8620 fNumbered = true;
8621 break;
8622 case kIemNativeLabelType_Else:
8623 pszName = "Else";
8624 fNumbered = true;
8625 pszComment = " ; regs state restored pre-if-block";
8626 break;
8627 case kIemNativeLabelType_Endif:
8628 pszName = "Endif";
8629 fNumbered = true;
8630 break;
8631 case kIemNativeLabelType_CheckIrq:
8632 pszName = "CheckIrq_CheckVM";
8633 fNumbered = true;
8634 break;
8635 case kIemNativeLabelType_TlbLookup:
8636 pszName = "TlbLookup";
8637 fNumbered = true;
8638 break;
8639 case kIemNativeLabelType_TlbMiss:
8640 pszName = "TlbMiss";
8641 fNumbered = true;
8642 break;
8643 case kIemNativeLabelType_TlbDone:
8644 pszName = "TlbDone";
8645 fNumbered = true;
8646 break;
8647 case kIemNativeLabelType_Invalid:
8648 case kIemNativeLabelType_End:
8649 break;
8650 }
8651 if (fNumbered)
8652 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8653 else
8654 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8655 continue;
8656 }
8657
8658 case kIemTbDbgEntryType_NativeOffset:
8659 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8660 Assert(offDbgNativeNext >= offNative);
8661 break;
8662
8663#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8664 case kIemTbDbgEntryType_DelayedPcUpdate:
8665 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8666 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8667 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8668 continue;
8669#endif
8670
8671#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8672 case kIemTbDbgEntryType_GuestRegDirty:
8673 {
8674 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8675 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8676 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8677 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8678 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8679 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8680 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8681 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8682 pszGstReg, pszHstReg);
8683 continue;
8684 }
8685
8686 case kIemTbDbgEntryType_GuestRegWriteback:
8687 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8688 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8689 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
8690 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
8691 continue;
8692#endif
8693
8694 default:
8695 AssertFailed();
8696 }
8697 iDbgEntry++;
8698 break;
8699 }
8700 }
8701
8702 /*
8703 * Disassemble the next native instruction.
8704 */
8705 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8706# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8707 uint32_t cbInstr = sizeof(paNative[0]);
8708 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8709 if (RT_SUCCESS(rc))
8710 {
8711# if defined(RT_ARCH_AMD64)
8712 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8713 {
8714 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8715 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8716 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8717 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8718 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8719 uInfo & 0x8000 ? "recompiled" : "todo");
8720 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8721 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8722 else
8723 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8724 }
8725 else
8726# endif
8727 {
8728 const char *pszAnnotation = NULL;
8729# ifdef RT_ARCH_AMD64
8730 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8731 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8732 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8733 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8734 PCDISOPPARAM pMemOp;
8735 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8736 pMemOp = &Dis.Param1;
8737 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8738 pMemOp = &Dis.Param2;
8739 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8740 pMemOp = &Dis.Param3;
8741 else
8742 pMemOp = NULL;
8743 if ( pMemOp
8744 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8745 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8746 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8747 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8748
8749#elif defined(RT_ARCH_ARM64)
8750 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8751 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8752 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8753# else
8754# error "Port me"
8755# endif
8756 if (pszAnnotation)
8757 {
8758 static unsigned const s_offAnnotation = 55;
8759 size_t const cchAnnotation = strlen(pszAnnotation);
8760 size_t cchDis = strlen(szDisBuf);
8761 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8762 {
8763 if (cchDis < s_offAnnotation)
8764 {
8765 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8766 cchDis = s_offAnnotation;
8767 }
8768 szDisBuf[cchDis++] = ' ';
8769 szDisBuf[cchDis++] = ';';
8770 szDisBuf[cchDis++] = ' ';
8771 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8772 }
8773 }
8774 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8775 }
8776 }
8777 else
8778 {
8779# if defined(RT_ARCH_AMD64)
8780 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8781 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8782# elif defined(RT_ARCH_ARM64)
8783 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8784# else
8785# error "Port me"
8786# endif
8787 cbInstr = sizeof(paNative[0]);
8788 }
8789 offNative += cbInstr / sizeof(paNative[0]);
8790
8791# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8792 cs_insn *pInstr;
8793 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8794 (uintptr_t)pNativeCur, 1, &pInstr);
8795 if (cInstrs > 0)
8796 {
8797 Assert(cInstrs == 1);
8798 const char *pszAnnotation = NULL;
8799# if defined(RT_ARCH_ARM64)
8800 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8801 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8802 {
8803 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8804 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8805 char *psz = strchr(pInstr->op_str, '[');
8806 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8807 {
8808 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8809 int32_t off = -1;
8810 psz += 4;
8811 if (*psz == ']')
8812 off = 0;
8813 else if (*psz == ',')
8814 {
8815 psz = RTStrStripL(psz + 1);
8816 if (*psz == '#')
8817 off = RTStrToInt32(&psz[1]);
8818 /** @todo deal with index registers and LSL as well... */
8819 }
8820 if (off >= 0)
8821 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8822 }
8823 }
8824# endif
8825
8826 size_t const cchOp = strlen(pInstr->op_str);
8827# if defined(RT_ARCH_AMD64)
8828 if (pszAnnotation)
8829 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8830 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8831 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8832 else
8833 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8834 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8835
8836# else
8837 if (pszAnnotation)
8838 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8839 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8840 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8841 else
8842 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8843 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8844# endif
8845 offNative += pInstr->size / sizeof(*pNativeCur);
8846 cs_free(pInstr, cInstrs);
8847 }
8848 else
8849 {
8850# if defined(RT_ARCH_AMD64)
8851 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8852 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8853# else
8854 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8855# endif
8856 offNative++;
8857 }
8858# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8859 }
8860 }
8861 else
8862#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8863 {
8864 /*
8865 * No debug info, just disassemble the x86 code and then the native code.
8866 *
8867 * First the guest code:
8868 */
8869 for (unsigned i = 0; i < pTb->cRanges; i++)
8870 {
8871 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8872 + (pTb->aRanges[i].idxPhysPage == 0
8873 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8874 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8875 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8876 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8877 unsigned off = pTb->aRanges[i].offOpcodes;
8878 /** @todo this ain't working when crossing pages! */
8879 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8880 while (off < cbOpcodes)
8881 {
8882 uint32_t cbInstr = 1;
8883 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8884 &pTb->pabOpcodes[off], cbOpcodes - off,
8885 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8886 if (RT_SUCCESS(rc))
8887 {
8888 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8889 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8890 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8891 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8892 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8893 GCPhysPc += cbInstr;
8894 off += cbInstr;
8895 }
8896 else
8897 {
8898 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8899 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8900 break;
8901 }
8902 }
8903 }
8904
8905 /*
8906 * Then the native code:
8907 */
8908 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8909 while (offNative < cNative)
8910 {
8911 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8912# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8913 uint32_t cbInstr = sizeof(paNative[0]);
8914 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8915 if (RT_SUCCESS(rc))
8916 {
8917# if defined(RT_ARCH_AMD64)
8918 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8919 {
8920 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8921 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8922 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8923 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8924 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8925 uInfo & 0x8000 ? "recompiled" : "todo");
8926 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8927 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8928 else
8929 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8930 }
8931 else
8932# endif
8933 {
8934# ifdef RT_ARCH_AMD64
8935 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8936 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8937 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8938 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8939# elif defined(RT_ARCH_ARM64)
8940 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8941 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8942 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8943# else
8944# error "Port me"
8945# endif
8946 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8947 }
8948 }
8949 else
8950 {
8951# if defined(RT_ARCH_AMD64)
8952 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8953 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8954# else
8955 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8956# endif
8957 cbInstr = sizeof(paNative[0]);
8958 }
8959 offNative += cbInstr / sizeof(paNative[0]);
8960
8961# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8962 cs_insn *pInstr;
8963 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8964 (uintptr_t)pNativeCur, 1, &pInstr);
8965 if (cInstrs > 0)
8966 {
8967 Assert(cInstrs == 1);
8968# if defined(RT_ARCH_AMD64)
8969 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8970 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8971# else
8972 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8973 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8974# endif
8975 offNative += pInstr->size / sizeof(*pNativeCur);
8976 cs_free(pInstr, cInstrs);
8977 }
8978 else
8979 {
8980# if defined(RT_ARCH_AMD64)
8981 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8982 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8983# else
8984 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8985# endif
8986 offNative++;
8987 }
8988# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8989 }
8990 }
8991
8992#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8993 /* Cleanup. */
8994 cs_close(&hDisasm);
8995#endif
8996}
8997
8998
8999/**
9000 * Recompiles the given threaded TB into a native one.
9001 *
9002 * In case of failure the translation block will be returned as-is.
9003 *
9004 * @returns pTb.
9005 * @param pVCpu The cross context virtual CPU structure of the calling
9006 * thread.
9007 * @param pTb The threaded translation to recompile to native.
9008 */
9009DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9010{
9011#if 0 /* For profiling the native recompiler code. */
9012l_profile_again:
9013#endif
9014 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9015
9016 /*
9017 * The first time thru, we allocate the recompiler state, the other times
9018 * we just need to reset it before using it again.
9019 */
9020 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9021 if (RT_LIKELY(pReNative))
9022 iemNativeReInit(pReNative, pTb);
9023 else
9024 {
9025 pReNative = iemNativeInit(pVCpu, pTb);
9026 AssertReturn(pReNative, pTb);
9027 }
9028
9029#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9030 /*
9031 * First do liveness analysis. This is done backwards.
9032 */
9033 {
9034 uint32_t idxCall = pTb->Thrd.cCalls;
9035 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9036 { /* likely */ }
9037 else
9038 {
9039 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9040 while (idxCall > cAlloc)
9041 cAlloc *= 2;
9042 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9043 AssertReturn(pvNew, pTb);
9044 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9045 pReNative->cLivenessEntriesAlloc = cAlloc;
9046 }
9047 AssertReturn(idxCall > 0, pTb);
9048 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9049
9050 /* The initial (final) entry. */
9051 idxCall--;
9052 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9053
9054 /* Loop backwards thru the calls and fill in the other entries. */
9055 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9056 while (idxCall > 0)
9057 {
9058 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9059 if (pfnLiveness)
9060 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9061 else
9062 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9063 pCallEntry--;
9064 idxCall--;
9065 }
9066
9067# ifdef VBOX_WITH_STATISTICS
9068 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9069 to 'clobbered' rather that 'input'. */
9070 /** @todo */
9071# endif
9072 }
9073#endif
9074
9075 /*
9076 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9077 * for aborting if an error happens.
9078 */
9079 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9080#ifdef LOG_ENABLED
9081 uint32_t const cCallsOrg = cCallsLeft;
9082#endif
9083 uint32_t off = 0;
9084 int rc = VINF_SUCCESS;
9085 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9086 {
9087 /*
9088 * Emit prolog code (fixed).
9089 */
9090 off = iemNativeEmitProlog(pReNative, off);
9091
9092 /*
9093 * Convert the calls to native code.
9094 */
9095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9096 int32_t iGstInstr = -1;
9097#endif
9098#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9099 uint32_t cThreadedCalls = 0;
9100 uint32_t cRecompiledCalls = 0;
9101#endif
9102#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9103 uint32_t idxCurCall = 0;
9104#endif
9105 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9106 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9107 while (cCallsLeft-- > 0)
9108 {
9109 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9110#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9111 pReNative->idxCurCall = idxCurCall;
9112#endif
9113
9114 /*
9115 * Debug info, assembly markup and statistics.
9116 */
9117#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9118 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9119 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9120#endif
9121#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9122 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9123 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9124 {
9125 if (iGstInstr < (int32_t)pTb->cInstructions)
9126 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9127 else
9128 Assert(iGstInstr == pTb->cInstructions);
9129 iGstInstr = pCallEntry->idxInstr;
9130 }
9131 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9132#endif
9133#if defined(VBOX_STRICT)
9134 off = iemNativeEmitMarker(pReNative, off,
9135 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9136#endif
9137#if defined(VBOX_STRICT)
9138 iemNativeRegAssertSanity(pReNative);
9139#endif
9140#ifdef VBOX_WITH_STATISTICS
9141 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9142#endif
9143
9144 /*
9145 * Actual work.
9146 */
9147 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9148 pfnRecom ? "(recompiled)" : "(todo)"));
9149 if (pfnRecom) /** @todo stats on this. */
9150 {
9151 off = pfnRecom(pReNative, off, pCallEntry);
9152 STAM_REL_STATS({cRecompiledCalls++;});
9153 }
9154 else
9155 {
9156 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9157 STAM_REL_STATS({cThreadedCalls++;});
9158 }
9159 Assert(off <= pReNative->cInstrBufAlloc);
9160 Assert(pReNative->cCondDepth == 0);
9161
9162#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9163 if (LogIs2Enabled())
9164 {
9165 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9166# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9167 static const char s_achState[] = "CUXI";
9168# else
9169 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9170# endif
9171
9172 char szGpr[17];
9173 for (unsigned i = 0; i < 16; i++)
9174 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9175 szGpr[16] = '\0';
9176
9177 char szSegBase[X86_SREG_COUNT + 1];
9178 char szSegLimit[X86_SREG_COUNT + 1];
9179 char szSegAttrib[X86_SREG_COUNT + 1];
9180 char szSegSel[X86_SREG_COUNT + 1];
9181 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9182 {
9183 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9184 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9185 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9186 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9187 }
9188 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9189 = szSegSel[X86_SREG_COUNT] = '\0';
9190
9191 char szEFlags[8];
9192 for (unsigned i = 0; i < 7; i++)
9193 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9194 szEFlags[7] = '\0';
9195
9196 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9197 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9198 }
9199#endif
9200
9201 /*
9202 * Advance.
9203 */
9204 pCallEntry++;
9205#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9206 idxCurCall++;
9207#endif
9208 }
9209
9210 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9211 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9212 if (!cThreadedCalls)
9213 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9214
9215 /*
9216 * Emit the epilog code.
9217 */
9218 uint32_t idxReturnLabel;
9219 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9220
9221 /*
9222 * Generate special jump labels.
9223 */
9224 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9225 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9226 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9227 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9228
9229 /*
9230 * Generate simple TB tail labels that just calls a help with a pVCpu
9231 * arg and either return or longjmps/throws a non-zero status.
9232 *
9233 * The array entries must be ordered by enmLabel value so we can index
9234 * using fTailLabels bit numbers.
9235 */
9236 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9237 static struct
9238 {
9239 IEMNATIVELABELTYPE enmLabel;
9240 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9241 } const g_aSimpleTailLabels[] =
9242 {
9243 { kIemNativeLabelType_Invalid, NULL },
9244 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9245 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9246 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9247 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9248 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9249 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9250 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9251 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9252 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9253 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9254 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9255 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9256 };
9257 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9258 AssertCompile(kIemNativeLabelType_Invalid == 0);
9259 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9260 if (fTailLabels)
9261 {
9262 do
9263 {
9264 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9265 fTailLabels &= ~RT_BIT_64(enmLabel);
9266 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9267
9268 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9269 Assert(idxLabel != UINT32_MAX);
9270 if (idxLabel != UINT32_MAX)
9271 {
9272 iemNativeLabelDefine(pReNative, idxLabel, off);
9273
9274 /* int pfnCallback(PVMCPUCC pVCpu) */
9275 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9276 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9277
9278 /* jump back to the return sequence. */
9279 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9280 }
9281
9282 } while (fTailLabels);
9283 }
9284 }
9285 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9286 {
9287 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9288 return pTb;
9289 }
9290 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9291 Assert(off <= pReNative->cInstrBufAlloc);
9292
9293 /*
9294 * Make sure all labels has been defined.
9295 */
9296 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9297#ifdef VBOX_STRICT
9298 uint32_t const cLabels = pReNative->cLabels;
9299 for (uint32_t i = 0; i < cLabels; i++)
9300 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9301#endif
9302
9303#if 0 /* For profiling the native recompiler code. */
9304 if (pTb->Thrd.cCalls >= 136)
9305 {
9306 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9307 goto l_profile_again;
9308 }
9309#endif
9310
9311 /*
9312 * Allocate executable memory, copy over the code we've generated.
9313 */
9314 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9315 if (pTbAllocator->pDelayedFreeHead)
9316 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9317
9318 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9319 AssertReturn(paFinalInstrBuf, pTb);
9320 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9321
9322 /*
9323 * Apply fixups.
9324 */
9325 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9326 uint32_t const cFixups = pReNative->cFixups;
9327 for (uint32_t i = 0; i < cFixups; i++)
9328 {
9329 Assert(paFixups[i].off < off);
9330 Assert(paFixups[i].idxLabel < cLabels);
9331 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9332 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9333 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9334 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9335 switch (paFixups[i].enmType)
9336 {
9337#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9338 case kIemNativeFixupType_Rel32:
9339 Assert(paFixups[i].off + 4 <= off);
9340 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9341 continue;
9342
9343#elif defined(RT_ARCH_ARM64)
9344 case kIemNativeFixupType_RelImm26At0:
9345 {
9346 Assert(paFixups[i].off < off);
9347 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9348 Assert(offDisp >= -262144 && offDisp < 262144);
9349 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9350 continue;
9351 }
9352
9353 case kIemNativeFixupType_RelImm19At5:
9354 {
9355 Assert(paFixups[i].off < off);
9356 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9357 Assert(offDisp >= -262144 && offDisp < 262144);
9358 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9359 continue;
9360 }
9361
9362 case kIemNativeFixupType_RelImm14At5:
9363 {
9364 Assert(paFixups[i].off < off);
9365 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9366 Assert(offDisp >= -8192 && offDisp < 8192);
9367 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9368 continue;
9369 }
9370
9371#endif
9372 case kIemNativeFixupType_Invalid:
9373 case kIemNativeFixupType_End:
9374 break;
9375 }
9376 AssertFailed();
9377 }
9378
9379 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9380 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9381
9382 /*
9383 * Convert the translation block.
9384 */
9385 RTMemFree(pTb->Thrd.paCalls);
9386 pTb->Native.paInstructions = paFinalInstrBuf;
9387 pTb->Native.cInstructions = off;
9388 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9389#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9390 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9391 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9392#endif
9393
9394 Assert(pTbAllocator->cThreadedTbs > 0);
9395 pTbAllocator->cThreadedTbs -= 1;
9396 pTbAllocator->cNativeTbs += 1;
9397 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9398
9399#ifdef LOG_ENABLED
9400 /*
9401 * Disassemble to the log if enabled.
9402 */
9403 if (LogIs3Enabled())
9404 {
9405 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9406 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9407# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9408 RTLogFlush(NULL);
9409# endif
9410 }
9411#endif
9412 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9413
9414 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9415 return pTb;
9416}
9417
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette