VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104144

Last change on this file since 104144 was 104144, checked in by vboxsync, 11 months ago

VMM/IEM: Build fixes for when the TLB lookup code is disabled, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 407.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104144 2024-04-03 17:54:54Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 iemRaiseDivideErrorJmp(pVCpu);
138#ifndef _MSC_VER
139 return VINF_IEM_RAISED_XCPT; /* not reached */
140#endif
141}
142
143
144/**
145 * Used by TB code when it wants to raise a \#UD.
146 */
147IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
148{
149 iemRaiseUndefinedOpcodeJmp(pVCpu);
150#ifndef _MSC_VER
151 return VINF_IEM_RAISED_XCPT; /* not reached */
152#endif
153}
154
155
156/**
157 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
158 *
159 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
160 */
161IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
162{
163 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
164 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
165 iemRaiseUndefinedOpcodeJmp(pVCpu);
166 else
167 iemRaiseDeviceNotAvailableJmp(pVCpu);
168#ifndef _MSC_VER
169 return VINF_IEM_RAISED_XCPT; /* not reached */
170#endif
171}
172
173
174/**
175 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
176 *
177 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
178 */
179IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
180{
181 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
182 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
183 iemRaiseUndefinedOpcodeJmp(pVCpu);
184 else
185 iemRaiseDeviceNotAvailableJmp(pVCpu);
186#ifndef _MSC_VER
187 return VINF_IEM_RAISED_XCPT; /* not reached */
188#endif
189}
190
191
192/**
193 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
194 *
195 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
196 */
197IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
198{
199 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
200 iemRaiseSimdFpExceptionJmp(pVCpu);
201 else
202 iemRaiseUndefinedOpcodeJmp(pVCpu);
203#ifndef _MSC_VER
204 return VINF_IEM_RAISED_XCPT; /* not reached */
205#endif
206}
207
208
209/**
210 * Used by TB code when it wants to raise a \#NM.
211 */
212IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
213{
214 iemRaiseDeviceNotAvailableJmp(pVCpu);
215#ifndef _MSC_VER
216 return VINF_IEM_RAISED_XCPT; /* not reached */
217#endif
218}
219
220
221/**
222 * Used by TB code when it wants to raise a \#GP(0).
223 */
224IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
225{
226 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
227#ifndef _MSC_VER
228 return VINF_IEM_RAISED_XCPT; /* not reached */
229#endif
230}
231
232
233/**
234 * Used by TB code when it wants to raise a \#MF.
235 */
236IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
237{
238 iemRaiseMathFaultJmp(pVCpu);
239#ifndef _MSC_VER
240 return VINF_IEM_RAISED_XCPT; /* not reached */
241#endif
242}
243
244
245/**
246 * Used by TB code when it wants to raise a \#XF.
247 */
248IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
249{
250 iemRaiseSimdFpExceptionJmp(pVCpu);
251#ifndef _MSC_VER
252 return VINF_IEM_RAISED_XCPT; /* not reached */
253#endif
254}
255
256
257/**
258 * Used by TB code when detecting opcode changes.
259 * @see iemThreadeFuncWorkerObsoleteTb
260 */
261IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
262{
263 /* We set fSafeToFree to false where as we're being called in the context
264 of a TB callback function, which for native TBs means we cannot release
265 the executable memory till we've returned our way back to iemTbExec as
266 that return path codes via the native code generated for the TB. */
267 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
268 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
269 return VINF_IEM_REEXEC_BREAK;
270}
271
272
273/**
274 * Used by TB code when we need to switch to a TB with CS.LIM checking.
275 */
276IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
277{
278 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
279 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
280 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
281 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
282 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
283 return VINF_IEM_REEXEC_BREAK;
284}
285
286
287/**
288 * Used by TB code when we missed a PC check after a branch.
289 */
290IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
291{
292 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
293 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
294 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
295 pVCpu->iem.s.pbInstrBuf));
296 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
297 return VINF_IEM_REEXEC_BREAK;
298}
299
300
301
302/*********************************************************************************************************************************
303* Helpers: Segmented memory fetches and stores. *
304*********************************************************************************************************************************/
305
306/**
307 * Used by TB code to load unsigned 8-bit data w/ segmentation.
308 */
309IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
310{
311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
312 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
313#else
314 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
315#endif
316}
317
318
319/**
320 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
321 * to 16 bits.
322 */
323IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
324{
325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
326 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
327#else
328 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
329#endif
330}
331
332
333/**
334 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
335 * to 32 bits.
336 */
337IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
338{
339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
340 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
341#else
342 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
343#endif
344}
345
346/**
347 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
348 * to 64 bits.
349 */
350IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
351{
352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
353 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
354#else
355 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
356#endif
357}
358
359
360/**
361 * Used by TB code to load unsigned 16-bit data w/ segmentation.
362 */
363IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
364{
365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
366 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
367#else
368 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
369#endif
370}
371
372
373/**
374 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
375 * to 32 bits.
376 */
377IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
378{
379#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
380 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
381#else
382 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
383#endif
384}
385
386
387/**
388 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
389 * to 64 bits.
390 */
391IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
392{
393#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
394 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
395#else
396 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
397#endif
398}
399
400
401/**
402 * Used by TB code to load unsigned 32-bit data w/ segmentation.
403 */
404IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
405{
406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
407 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
408#else
409 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
410#endif
411}
412
413
414/**
415 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
416 * to 64 bits.
417 */
418IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
419{
420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
421 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
422#else
423 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
424#endif
425}
426
427
428/**
429 * Used by TB code to load unsigned 64-bit data w/ segmentation.
430 */
431IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
432{
433#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
434 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
435#else
436 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
437#endif
438}
439
440
441#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
442/**
443 * Used by TB code to load 128-bit data w/ segmentation.
444 */
445IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
446{
447#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
448 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
449#else
450 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
451#endif
452}
453
454
455/**
456 * Used by TB code to load 128-bit data w/ segmentation.
457 */
458IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
459{
460#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
461 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
462#else
463 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
464#endif
465}
466
467
468/**
469 * Used by TB code to load 128-bit data w/ segmentation.
470 */
471IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
472{
473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
474 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
475#else
476 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
477#endif
478}
479
480
481/**
482 * Used by TB code to load 256-bit data w/ segmentation.
483 */
484IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
485{
486#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
487 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
488#else
489 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
490#endif
491}
492
493
494/**
495 * Used by TB code to load 256-bit data w/ segmentation.
496 */
497IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
498{
499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
500 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
501#else
502 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
503#endif
504}
505#endif
506
507
508/**
509 * Used by TB code to store unsigned 8-bit data w/ segmentation.
510 */
511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
512{
513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
514 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
515#else
516 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
517#endif
518}
519
520
521/**
522 * Used by TB code to store unsigned 16-bit data w/ segmentation.
523 */
524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
525{
526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
527 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
528#else
529 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
530#endif
531}
532
533
534/**
535 * Used by TB code to store unsigned 32-bit data w/ segmentation.
536 */
537IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
538{
539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
540 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
541#else
542 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
543#endif
544}
545
546
547/**
548 * Used by TB code to store unsigned 64-bit data w/ segmentation.
549 */
550IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
551{
552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
553 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
554#else
555 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
556#endif
557}
558
559
560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
561/**
562 * Used by TB code to store unsigned 128-bit data w/ segmentation.
563 */
564IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
565{
566#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
567 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
568#else
569 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
570#endif
571}
572
573
574/**
575 * Used by TB code to store unsigned 128-bit data w/ segmentation.
576 */
577IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
578{
579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
580 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
581#else
582 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
583#endif
584}
585
586
587/**
588 * Used by TB code to store unsigned 256-bit data w/ segmentation.
589 */
590IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
591{
592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
593 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
594#else
595 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
596#endif
597}
598
599
600/**
601 * Used by TB code to store unsigned 256-bit data w/ segmentation.
602 */
603IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
604{
605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
606 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
607#else
608 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
609#endif
610}
611#endif
612
613
614
615/**
616 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
617 */
618IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
619{
620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
621 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
622#else
623 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
624#endif
625}
626
627
628/**
629 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
630 */
631IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
632{
633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
634 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
635#else
636 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
637#endif
638}
639
640
641/**
642 * Used by TB code to store an 32-bit selector value onto a generic stack.
643 *
644 * Intel CPUs doesn't do write a whole dword, thus the special function.
645 */
646IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
647{
648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
649 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
650#else
651 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
652#endif
653}
654
655
656/**
657 * Used by TB code to push unsigned 64-bit value onto a generic stack.
658 */
659IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
660{
661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
662 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
663#else
664 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
665#endif
666}
667
668
669/**
670 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
671 */
672IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
673{
674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
675 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
676#else
677 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
678#endif
679}
680
681
682/**
683 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
684 */
685IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
686{
687#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
688 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
689#else
690 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
691#endif
692}
693
694
695/**
696 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
697 */
698IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
699{
700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
701 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
702#else
703 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
704#endif
705}
706
707
708
709/*********************************************************************************************************************************
710* Helpers: Flat memory fetches and stores. *
711*********************************************************************************************************************************/
712
713/**
714 * Used by TB code to load unsigned 8-bit data w/ flat address.
715 * @note Zero extending the value to 64-bit to simplify assembly.
716 */
717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
721#else
722 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
729 * to 16 bits.
730 * @note Zero extending the value to 64-bit to simplify assembly.
731 */
732IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
733{
734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
735 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
736#else
737 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
738#endif
739}
740
741
742/**
743 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
744 * to 32 bits.
745 * @note Zero extending the value to 64-bit to simplify assembly.
746 */
747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
748{
749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
750 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
751#else
752 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
753#endif
754}
755
756
757/**
758 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
759 * to 64 bits.
760 */
761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
762{
763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
764 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
765#else
766 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
767#endif
768}
769
770
771/**
772 * Used by TB code to load unsigned 16-bit data w/ flat address.
773 * @note Zero extending the value to 64-bit to simplify assembly.
774 */
775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
776{
777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
778 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
779#else
780 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
781#endif
782}
783
784
785/**
786 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
787 * to 32 bits.
788 * @note Zero extending the value to 64-bit to simplify assembly.
789 */
790IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
791{
792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
793 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
794#else
795 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
796#endif
797}
798
799
800/**
801 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
802 * to 64 bits.
803 * @note Zero extending the value to 64-bit to simplify assembly.
804 */
805IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
806{
807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
808 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
809#else
810 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
811#endif
812}
813
814
815/**
816 * Used by TB code to load unsigned 32-bit data w/ flat address.
817 * @note Zero extending the value to 64-bit to simplify assembly.
818 */
819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
820{
821#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
822 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
823#else
824 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
825#endif
826}
827
828
829/**
830 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
831 * to 64 bits.
832 * @note Zero extending the value to 64-bit to simplify assembly.
833 */
834IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
835{
836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
837 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
838#else
839 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
840#endif
841}
842
843
844/**
845 * Used by TB code to load unsigned 64-bit data w/ flat address.
846 */
847IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
848{
849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
850 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
851#else
852 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
853#endif
854}
855
856
857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
858/**
859 * Used by TB code to load unsigned 128-bit data w/ flat address.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
864 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
865#else
866 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
867#endif
868}
869
870
871/**
872 * Used by TB code to load unsigned 128-bit data w/ flat address.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
877 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
878#else
879 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
880#endif
881}
882
883
884/**
885 * Used by TB code to load unsigned 128-bit data w/ flat address.
886 */
887IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
888{
889#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
890 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
891#else
892 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
893#endif
894}
895
896
897/**
898 * Used by TB code to load unsigned 256-bit data w/ flat address.
899 */
900IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
901{
902#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
903 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
904#else
905 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
906#endif
907}
908
909
910/**
911 * Used by TB code to load unsigned 256-bit data w/ flat address.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
916 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
917#else
918 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
919#endif
920}
921#endif
922
923
924/**
925 * Used by TB code to store unsigned 8-bit data w/ flat address.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
930 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
931#else
932 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to store unsigned 16-bit data w/ flat address.
939 */
940IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
943 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
944#else
945 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
946#endif
947}
948
949
950/**
951 * Used by TB code to store unsigned 32-bit data w/ flat address.
952 */
953IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
956 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
957#else
958 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
959#endif
960}
961
962
963/**
964 * Used by TB code to store unsigned 64-bit data w/ flat address.
965 */
966IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
969 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
970#else
971 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
972#endif
973}
974
975
976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
977/**
978 * Used by TB code to store unsigned 128-bit data w/ flat address.
979 */
980IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
981{
982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
983 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
984#else
985 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
986#endif
987}
988
989
990/**
991 * Used by TB code to store unsigned 128-bit data w/ flat address.
992 */
993IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
994{
995#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
996 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
997#else
998 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
999#endif
1000}
1001
1002
1003/**
1004 * Used by TB code to store unsigned 256-bit data w/ flat address.
1005 */
1006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1007{
1008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1009 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1010#else
1011 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1012#endif
1013}
1014
1015
1016/**
1017 * Used by TB code to store unsigned 256-bit data w/ flat address.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1022 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1023#else
1024 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1025#endif
1026}
1027#endif
1028
1029
1030
1031/**
1032 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1037 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1038#else
1039 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1050 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1051#else
1052 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to store a segment selector value onto a flat stack.
1059 *
1060 * Intel CPUs doesn't do write a whole dword, thus the special function.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1065 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1066#else
1067 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1074 */
1075IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1076{
1077#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1078 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1079#else
1080 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1081#endif
1082}
1083
1084
1085/**
1086 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1087 */
1088IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1089{
1090#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1091 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1092#else
1093 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1094#endif
1095}
1096
1097
1098/**
1099 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1104 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1105#else
1106 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1117 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1118#else
1119 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124
1125/*********************************************************************************************************************************
1126* Helpers: Segmented memory mapping. *
1127*********************************************************************************************************************************/
1128
1129/**
1130 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1131 * segmentation.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1134 RTGCPTR GCPtrMem, uint8_t iSegReg))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1137 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1138#else
1139 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1148 RTGCPTR GCPtrMem, uint8_t iSegReg))
1149{
1150#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1151 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1152#else
1153 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1154#endif
1155}
1156
1157
1158/**
1159 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1160 */
1161IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1162 RTGCPTR GCPtrMem, uint8_t iSegReg))
1163{
1164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1165 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1166#else
1167 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1168#endif
1169}
1170
1171
1172/**
1173 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1174 */
1175IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1176 RTGCPTR GCPtrMem, uint8_t iSegReg))
1177{
1178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1179 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1180#else
1181 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1182#endif
1183}
1184
1185
1186/**
1187 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1188 * segmentation.
1189 */
1190IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1191 RTGCPTR GCPtrMem, uint8_t iSegReg))
1192{
1193#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1194 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1195#else
1196 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1197#endif
1198}
1199
1200
1201/**
1202 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1203 */
1204IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1205 RTGCPTR GCPtrMem, uint8_t iSegReg))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1208 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1209#else
1210 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1219 RTGCPTR GCPtrMem, uint8_t iSegReg))
1220{
1221#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1222 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1223#else
1224 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1225#endif
1226}
1227
1228
1229/**
1230 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1231 */
1232IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1233 RTGCPTR GCPtrMem, uint8_t iSegReg))
1234{
1235#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1236 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1237#else
1238 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1239#endif
1240}
1241
1242
1243/**
1244 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1245 * segmentation.
1246 */
1247IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1248 RTGCPTR GCPtrMem, uint8_t iSegReg))
1249{
1250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1251 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1252#else
1253 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1254#endif
1255}
1256
1257
1258/**
1259 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1260 */
1261IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1262 RTGCPTR GCPtrMem, uint8_t iSegReg))
1263{
1264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1265 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1266#else
1267 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1268#endif
1269}
1270
1271
1272/**
1273 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1274 */
1275IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1276 RTGCPTR GCPtrMem, uint8_t iSegReg))
1277{
1278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1279 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1280#else
1281 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1282#endif
1283}
1284
1285
1286/**
1287 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1288 */
1289IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1290 RTGCPTR GCPtrMem, uint8_t iSegReg))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1293 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1294#else
1295 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1296#endif
1297}
1298
1299
1300/**
1301 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1302 * segmentation.
1303 */
1304IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1305 RTGCPTR GCPtrMem, uint8_t iSegReg))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1308 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1309#else
1310 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1319 RTGCPTR GCPtrMem, uint8_t iSegReg))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1322 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1323#else
1324 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1331 */
1332IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1333 RTGCPTR GCPtrMem, uint8_t iSegReg))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1336 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1337#else
1338 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1347 RTGCPTR GCPtrMem, uint8_t iSegReg))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1350 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1351#else
1352 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1361 RTGCPTR GCPtrMem, uint8_t iSegReg))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1364 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1365#else
1366 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1375 RTGCPTR GCPtrMem, uint8_t iSegReg))
1376{
1377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1378 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1379#else
1380 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1381#endif
1382}
1383
1384
1385/**
1386 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1387 * segmentation.
1388 */
1389IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1390 RTGCPTR GCPtrMem, uint8_t iSegReg))
1391{
1392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1393 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1394#else
1395 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1396#endif
1397}
1398
1399
1400/**
1401 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1402 */
1403IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1404 RTGCPTR GCPtrMem, uint8_t iSegReg))
1405{
1406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1407 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1408#else
1409 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#endif
1411}
1412
1413
1414/**
1415 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1416 */
1417IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1418 RTGCPTR GCPtrMem, uint8_t iSegReg))
1419{
1420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1421 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1422#else
1423 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#endif
1425}
1426
1427
1428/**
1429 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1430 */
1431IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1432 RTGCPTR GCPtrMem, uint8_t iSegReg))
1433{
1434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1435 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1436#else
1437 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#endif
1439}
1440
1441
1442/*********************************************************************************************************************************
1443* Helpers: Flat memory mapping. *
1444*********************************************************************************************************************************/
1445
1446/**
1447 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1448 * address.
1449 */
1450IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1451{
1452#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1453 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1454#else
1455 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1456#endif
1457}
1458
1459
1460/**
1461 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1467#else
1468 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1480#else
1481 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1490{
1491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1492 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1493#else
1494 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1495#endif
1496}
1497
1498
1499/**
1500 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1501 * address.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1504{
1505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1506 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1507#else
1508 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1509#endif
1510}
1511
1512
1513/**
1514 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1515 */
1516IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1517{
1518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1519 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1520#else
1521 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1522#endif
1523}
1524
1525
1526/**
1527 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1528 */
1529IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1530{
1531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1532 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1533#else
1534 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1535#endif
1536}
1537
1538
1539/**
1540 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1541 */
1542IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1543{
1544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1545 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1546#else
1547 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1548#endif
1549}
1550
1551
1552/**
1553 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1554 * address.
1555 */
1556IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1557{
1558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1559 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1560#else
1561 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1562#endif
1563}
1564
1565
1566/**
1567 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1568 */
1569IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1570{
1571#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1572 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1573#else
1574 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1575#endif
1576}
1577
1578
1579/**
1580 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1581 */
1582IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1583{
1584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1585 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1586#else
1587 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1588#endif
1589}
1590
1591
1592/**
1593 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1594 */
1595IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1599#else
1600 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1607 * address.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1613#else
1614 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1623{
1624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1625 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1626#else
1627 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1628#endif
1629}
1630
1631
1632/**
1633 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1636{
1637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1638 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1639#else
1640 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1641#endif
1642}
1643
1644
1645/**
1646 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1652#else
1653 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1665#else
1666 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1675{
1676#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1677 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1678#else
1679 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1680#endif
1681}
1682
1683
1684/**
1685 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1686 * address.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1689{
1690#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1691 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1692#else
1693 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1694#endif
1695}
1696
1697
1698/**
1699 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1704 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1705#else
1706 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1715{
1716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1717 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1718#else
1719 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1720#endif
1721}
1722
1723
1724/**
1725 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1730 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1731#else
1732 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1733#endif
1734}
1735
1736
1737/*********************************************************************************************************************************
1738* Helpers: Commit, rollback & unmap *
1739*********************************************************************************************************************************/
1740
1741/**
1742 * Used by TB code to commit and unmap a read-write memory mapping.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1745{
1746 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1747}
1748
1749
1750/**
1751 * Used by TB code to commit and unmap a read-write memory mapping.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1754{
1755 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1756}
1757
1758
1759/**
1760 * Used by TB code to commit and unmap a write-only memory mapping.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1763{
1764 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1765}
1766
1767
1768/**
1769 * Used by TB code to commit and unmap a read-only memory mapping.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1772{
1773 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1774}
1775
1776
1777/**
1778 * Reinitializes the native recompiler state.
1779 *
1780 * Called before starting a new recompile job.
1781 */
1782static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1783{
1784 pReNative->cLabels = 0;
1785 pReNative->bmLabelTypes = 0;
1786 pReNative->cFixups = 0;
1787#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1788 pReNative->pDbgInfo->cEntries = 0;
1789 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1790#endif
1791 pReNative->pTbOrg = pTb;
1792 pReNative->cCondDepth = 0;
1793 pReNative->uCondSeqNo = 0;
1794 pReNative->uCheckIrqSeqNo = 0;
1795 pReNative->uTlbSeqNo = 0;
1796
1797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1798 pReNative->Core.offPc = 0;
1799 pReNative->Core.cInstrPcUpdateSkipped = 0;
1800#endif
1801#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1802 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1803#endif
1804 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1805#if IEMNATIVE_HST_GREG_COUNT < 32
1806 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1807#endif
1808 ;
1809 pReNative->Core.bmHstRegsWithGstShadow = 0;
1810 pReNative->Core.bmGstRegShadows = 0;
1811#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1812 pReNative->Core.bmGstRegShadowDirty = 0;
1813#endif
1814 pReNative->Core.bmVars = 0;
1815 pReNative->Core.bmStack = 0;
1816 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1817 pReNative->Core.u64ArgVars = UINT64_MAX;
1818
1819 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
1820 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1821 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1822 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1823 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1824 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1825 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1826 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1827 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1828 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1829 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1830 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1837
1838 /* Full host register reinit: */
1839 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1840 {
1841 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1842 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1843 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1844 }
1845
1846 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1847 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1848#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1849 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1850#endif
1851#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1852 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1853#endif
1854#ifdef IEMNATIVE_REG_FIXED_TMP1
1855 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1856#endif
1857#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1858 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1859#endif
1860 );
1861 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1862 {
1863 fRegs &= ~RT_BIT_32(idxReg);
1864 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1865 }
1866
1867 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1868#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1869 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1870#endif
1871#ifdef IEMNATIVE_REG_FIXED_TMP0
1872 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1873#endif
1874#ifdef IEMNATIVE_REG_FIXED_TMP1
1875 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1876#endif
1877#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1878 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1879#endif
1880
1881#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1882 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1883# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1884 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1885# endif
1886 ;
1887 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1888 pReNative->Core.bmGstSimdRegShadows = 0;
1889 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1890 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1891
1892 /* Full host register reinit: */
1893 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1894 {
1895 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1896 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1897 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1898 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1899 }
1900
1901 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1902 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1903 {
1904 fRegs &= ~RT_BIT_32(idxReg);
1905 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1906 }
1907
1908#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1909 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1910#endif
1911
1912#endif
1913
1914 return pReNative;
1915}
1916
1917
1918/**
1919 * Allocates and initializes the native recompiler state.
1920 *
1921 * This is called the first time an EMT wants to recompile something.
1922 *
1923 * @returns Pointer to the new recompiler state.
1924 * @param pVCpu The cross context virtual CPU structure of the calling
1925 * thread.
1926 * @param pTb The TB that's about to be recompiled.
1927 * @thread EMT(pVCpu)
1928 */
1929static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1930{
1931 VMCPU_ASSERT_EMT(pVCpu);
1932
1933 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1934 AssertReturn(pReNative, NULL);
1935
1936 /*
1937 * Try allocate all the buffers and stuff we need.
1938 */
1939 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1940 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1941 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1943 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1944#endif
1945 if (RT_LIKELY( pReNative->pInstrBuf
1946 && pReNative->paLabels
1947 && pReNative->paFixups)
1948#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1949 && pReNative->pDbgInfo
1950#endif
1951 )
1952 {
1953 /*
1954 * Set the buffer & array sizes on success.
1955 */
1956 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1957 pReNative->cLabelsAlloc = _8K;
1958 pReNative->cFixupsAlloc = _16K;
1959#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1960 pReNative->cDbgInfoAlloc = _16K;
1961#endif
1962
1963 /* Other constant stuff: */
1964 pReNative->pVCpu = pVCpu;
1965
1966 /*
1967 * Done, just need to save it and reinit it.
1968 */
1969 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1970 return iemNativeReInit(pReNative, pTb);
1971 }
1972
1973 /*
1974 * Failed. Cleanup and return.
1975 */
1976 AssertFailed();
1977 RTMemFree(pReNative->pInstrBuf);
1978 RTMemFree(pReNative->paLabels);
1979 RTMemFree(pReNative->paFixups);
1980#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1981 RTMemFree(pReNative->pDbgInfo);
1982#endif
1983 RTMemFree(pReNative);
1984 return NULL;
1985}
1986
1987
1988/**
1989 * Creates a label
1990 *
1991 * If the label does not yet have a defined position,
1992 * call iemNativeLabelDefine() later to set it.
1993 *
1994 * @returns Label ID. Throws VBox status code on failure, so no need to check
1995 * the return value.
1996 * @param pReNative The native recompile state.
1997 * @param enmType The label type.
1998 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1999 * label is not yet defined (default).
2000 * @param uData Data associated with the lable. Only applicable to
2001 * certain type of labels. Default is zero.
2002 */
2003DECL_HIDDEN_THROW(uint32_t)
2004iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2005 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2006{
2007 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2008
2009 /*
2010 * Locate existing label definition.
2011 *
2012 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2013 * and uData is zero.
2014 */
2015 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2016 uint32_t const cLabels = pReNative->cLabels;
2017 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2018#ifndef VBOX_STRICT
2019 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2020 && offWhere == UINT32_MAX
2021 && uData == 0
2022#endif
2023 )
2024 {
2025#ifndef VBOX_STRICT
2026 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2027 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2028 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2029 if (idxLabel < pReNative->cLabels)
2030 return idxLabel;
2031#else
2032 for (uint32_t i = 0; i < cLabels; i++)
2033 if ( paLabels[i].enmType == enmType
2034 && paLabels[i].uData == uData)
2035 {
2036 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2037 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2038 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2039 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2040 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2041 return i;
2042 }
2043 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2044 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2045#endif
2046 }
2047
2048 /*
2049 * Make sure we've got room for another label.
2050 */
2051 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2052 { /* likely */ }
2053 else
2054 {
2055 uint32_t cNew = pReNative->cLabelsAlloc;
2056 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2057 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2058 cNew *= 2;
2059 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2060 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2061 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2062 pReNative->paLabels = paLabels;
2063 pReNative->cLabelsAlloc = cNew;
2064 }
2065
2066 /*
2067 * Define a new label.
2068 */
2069 paLabels[cLabels].off = offWhere;
2070 paLabels[cLabels].enmType = enmType;
2071 paLabels[cLabels].uData = uData;
2072 pReNative->cLabels = cLabels + 1;
2073
2074 Assert((unsigned)enmType < 64);
2075 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2076
2077 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2078 {
2079 Assert(uData == 0);
2080 pReNative->aidxUniqueLabels[enmType] = cLabels;
2081 }
2082
2083 if (offWhere != UINT32_MAX)
2084 {
2085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2086 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2087 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2088#endif
2089 }
2090 return cLabels;
2091}
2092
2093
2094/**
2095 * Defines the location of an existing label.
2096 *
2097 * @param pReNative The native recompile state.
2098 * @param idxLabel The label to define.
2099 * @param offWhere The position.
2100 */
2101DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2102{
2103 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2104 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2105 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2106 pLabel->off = offWhere;
2107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2108 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2109 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2110#endif
2111}
2112
2113
2114/**
2115 * Looks up a lable.
2116 *
2117 * @returns Label ID if found, UINT32_MAX if not.
2118 */
2119static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2120 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2121{
2122 Assert((unsigned)enmType < 64);
2123 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2124 {
2125 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2126 return pReNative->aidxUniqueLabels[enmType];
2127
2128 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2129 uint32_t const cLabels = pReNative->cLabels;
2130 for (uint32_t i = 0; i < cLabels; i++)
2131 if ( paLabels[i].enmType == enmType
2132 && paLabels[i].uData == uData
2133 && ( paLabels[i].off == offWhere
2134 || offWhere == UINT32_MAX
2135 || paLabels[i].off == UINT32_MAX))
2136 return i;
2137 }
2138 return UINT32_MAX;
2139}
2140
2141
2142/**
2143 * Adds a fixup.
2144 *
2145 * @throws VBox status code (int) on failure.
2146 * @param pReNative The native recompile state.
2147 * @param offWhere The instruction offset of the fixup location.
2148 * @param idxLabel The target label ID for the fixup.
2149 * @param enmType The fixup type.
2150 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2151 */
2152DECL_HIDDEN_THROW(void)
2153iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2154 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2155{
2156 Assert(idxLabel <= UINT16_MAX);
2157 Assert((unsigned)enmType <= UINT8_MAX);
2158#ifdef RT_ARCH_ARM64
2159 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2160 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2162#endif
2163
2164 /*
2165 * Make sure we've room.
2166 */
2167 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2168 uint32_t const cFixups = pReNative->cFixups;
2169 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2170 { /* likely */ }
2171 else
2172 {
2173 uint32_t cNew = pReNative->cFixupsAlloc;
2174 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2175 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2176 cNew *= 2;
2177 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2178 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2179 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2180 pReNative->paFixups = paFixups;
2181 pReNative->cFixupsAlloc = cNew;
2182 }
2183
2184 /*
2185 * Add the fixup.
2186 */
2187 paFixups[cFixups].off = offWhere;
2188 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2189 paFixups[cFixups].enmType = enmType;
2190 paFixups[cFixups].offAddend = offAddend;
2191 pReNative->cFixups = cFixups + 1;
2192}
2193
2194
2195/**
2196 * Slow code path for iemNativeInstrBufEnsure.
2197 */
2198DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2199{
2200 /* Double the buffer size till we meet the request. */
2201 uint32_t cNew = pReNative->cInstrBufAlloc;
2202 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2203 do
2204 cNew *= 2;
2205 while (cNew < off + cInstrReq);
2206
2207 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2208#ifdef RT_ARCH_ARM64
2209 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2210#else
2211 uint32_t const cbMaxInstrBuf = _2M;
2212#endif
2213 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2214
2215 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2216 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2217
2218#ifdef VBOX_STRICT
2219 pReNative->offInstrBufChecked = off + cInstrReq;
2220#endif
2221 pReNative->cInstrBufAlloc = cNew;
2222 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2223}
2224
2225#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2226
2227/**
2228 * Grows the static debug info array used during recompilation.
2229 *
2230 * @returns Pointer to the new debug info block; throws VBox status code on
2231 * failure, so no need to check the return value.
2232 */
2233DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2234{
2235 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2236 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2237 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2238 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2239 pReNative->pDbgInfo = pDbgInfo;
2240 pReNative->cDbgInfoAlloc = cNew;
2241 return pDbgInfo;
2242}
2243
2244
2245/**
2246 * Adds a new debug info uninitialized entry, returning the pointer to it.
2247 */
2248DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2249{
2250 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2251 { /* likely */ }
2252 else
2253 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2254 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2255}
2256
2257
2258/**
2259 * Debug Info: Adds a native offset record, if necessary.
2260 */
2261DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2262{
2263 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2264
2265 /*
2266 * Do we need this one?
2267 */
2268 uint32_t const offPrev = pDbgInfo->offNativeLast;
2269 if (offPrev == off)
2270 return;
2271 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2272
2273 /*
2274 * Add it.
2275 */
2276 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2277 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2278 pEntry->NativeOffset.offNative = off;
2279 pDbgInfo->offNativeLast = off;
2280}
2281
2282
2283/**
2284 * Debug Info: Record info about a label.
2285 */
2286static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2287{
2288 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2289 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2290 pEntry->Label.uUnused = 0;
2291 pEntry->Label.enmLabel = (uint8_t)enmType;
2292 pEntry->Label.uData = uData;
2293}
2294
2295
2296/**
2297 * Debug Info: Record info about a threaded call.
2298 */
2299static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2300{
2301 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2302 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2303 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2304 pEntry->ThreadedCall.uUnused = 0;
2305 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2306}
2307
2308
2309/**
2310 * Debug Info: Record info about a new guest instruction.
2311 */
2312static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2313{
2314 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2315 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2316 pEntry->GuestInstruction.uUnused = 0;
2317 pEntry->GuestInstruction.fExec = fExec;
2318}
2319
2320
2321/**
2322 * Debug Info: Record info about guest register shadowing.
2323 */
2324DECL_HIDDEN_THROW(void)
2325iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2326 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2327{
2328 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2329 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2330 pEntry->GuestRegShadowing.uUnused = 0;
2331 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2332 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2333 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2334#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2335 Assert( idxHstReg != UINT8_MAX
2336 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2337#endif
2338}
2339
2340
2341# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2342/**
2343 * Debug Info: Record info about guest register shadowing.
2344 */
2345DECL_HIDDEN_THROW(void)
2346iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2347 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2348{
2349 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2350 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2351 pEntry->GuestSimdRegShadowing.uUnused = 0;
2352 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2353 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2354 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2355}
2356# endif
2357
2358
2359# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2360/**
2361 * Debug Info: Record info about delayed RIP updates.
2362 */
2363DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2364{
2365 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2366 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2367 pEntry->DelayedPcUpdate.offPc = offPc;
2368 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2369}
2370# endif
2371
2372# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2373
2374/**
2375 * Debug Info: Record info about a dirty guest register.
2376 */
2377DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2378 uint8_t idxGstReg, uint8_t idxHstReg)
2379{
2380 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2381 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2382 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2383 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2384 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2385}
2386
2387
2388/**
2389 * Debug Info: Record info about a dirty guest register writeback operation.
2390 */
2391DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2392{
2393 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2394 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2395 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2396 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;
2397 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
2398 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
2399}
2400
2401# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2402
2403#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2404
2405
2406/*********************************************************************************************************************************
2407* Register Allocator *
2408*********************************************************************************************************************************/
2409
2410/**
2411 * Register parameter indexes (indexed by argument number).
2412 */
2413DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2414{
2415 IEMNATIVE_CALL_ARG0_GREG,
2416 IEMNATIVE_CALL_ARG1_GREG,
2417 IEMNATIVE_CALL_ARG2_GREG,
2418 IEMNATIVE_CALL_ARG3_GREG,
2419#if defined(IEMNATIVE_CALL_ARG4_GREG)
2420 IEMNATIVE_CALL_ARG4_GREG,
2421# if defined(IEMNATIVE_CALL_ARG5_GREG)
2422 IEMNATIVE_CALL_ARG5_GREG,
2423# if defined(IEMNATIVE_CALL_ARG6_GREG)
2424 IEMNATIVE_CALL_ARG6_GREG,
2425# if defined(IEMNATIVE_CALL_ARG7_GREG)
2426 IEMNATIVE_CALL_ARG7_GREG,
2427# endif
2428# endif
2429# endif
2430#endif
2431};
2432AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2433
2434/**
2435 * Call register masks indexed by argument count.
2436 */
2437DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2438{
2439 0,
2440 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2441 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2442 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2443 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2444 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2445#if defined(IEMNATIVE_CALL_ARG4_GREG)
2446 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2447 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2448# if defined(IEMNATIVE_CALL_ARG5_GREG)
2449 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2450 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2451# if defined(IEMNATIVE_CALL_ARG6_GREG)
2452 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2453 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2454 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2455# if defined(IEMNATIVE_CALL_ARG7_GREG)
2456 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2457 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2458 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2459# endif
2460# endif
2461# endif
2462#endif
2463};
2464
2465#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2466/**
2467 * BP offset of the stack argument slots.
2468 *
2469 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2470 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2471 */
2472DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2473{
2474 IEMNATIVE_FP_OFF_STACK_ARG0,
2475# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2476 IEMNATIVE_FP_OFF_STACK_ARG1,
2477# endif
2478# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2479 IEMNATIVE_FP_OFF_STACK_ARG2,
2480# endif
2481# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2482 IEMNATIVE_FP_OFF_STACK_ARG3,
2483# endif
2484};
2485AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2486#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2487
2488/**
2489 * Info about shadowed guest register values.
2490 * @see IEMNATIVEGSTREG
2491 */
2492DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2493{
2494#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2495 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2496 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2497 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2498 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2499 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2500 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2501 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2502 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2503 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2504 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2505 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2506 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2507 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2508 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2509 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2510 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2511 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2512 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2513 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2514 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2515 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2516 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2517 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2518 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2519 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2520 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2521 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2522 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2523 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2524 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2525 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2526 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2527 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2528 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2529 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2530 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2531 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2532 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2533 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2534 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2535 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2536 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2537 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2538 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2539 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2540 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2541 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2542 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2543#undef CPUMCTX_OFF_AND_SIZE
2544};
2545AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2546
2547
2548/** Host CPU general purpose register names. */
2549DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2550{
2551#ifdef RT_ARCH_AMD64
2552 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2553#elif RT_ARCH_ARM64
2554 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2555 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2556#else
2557# error "port me"
2558#endif
2559};
2560
2561
2562#if 0 /* unused */
2563/**
2564 * Tries to locate a suitable register in the given register mask.
2565 *
2566 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2567 * failed.
2568 *
2569 * @returns Host register number on success, returns UINT8_MAX on failure.
2570 */
2571static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2572{
2573 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2574 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2575 if (fRegs)
2576 {
2577 /** @todo pick better here: */
2578 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2579
2580 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2581 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2582 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2583 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2584
2585 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2586 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2587 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2588 return idxReg;
2589 }
2590 return UINT8_MAX;
2591}
2592#endif /* unused */
2593
2594
2595#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2596/**
2597 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2598 *
2599 * @returns New code buffer offset on success, UINT32_MAX on failure.
2600 * @param pReNative .
2601 * @param off The current code buffer position.
2602 * @param enmGstReg The guest register to store to.
2603 * @param idxHstReg The host register to store from.
2604 */
2605DECL_FORCE_INLINE_THROW(uint32_t)
2606iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2607{
2608 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2609 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2610
2611 switch (g_aGstShadowInfo[enmGstReg].cb)
2612 {
2613 case sizeof(uint64_t):
2614 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2615 case sizeof(uint32_t):
2616 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2617 case sizeof(uint16_t):
2618 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2619#if 0 /* not present in the table. */
2620 case sizeof(uint8_t):
2621 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2622#endif
2623 default:
2624 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2625 }
2626}
2627
2628
2629/**
2630 * Emits code to flush a pending write of the given guest register if any.
2631 *
2632 * @returns New code buffer offset.
2633 * @param pReNative The native recompile state.
2634 * @param off Current code buffer position.
2635 * @param enmGstReg The guest register to flush.
2636 */
2637DECL_HIDDEN_THROW(uint32_t)
2638iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2639{
2640 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2641
2642 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2643 && enmGstReg <= kIemNativeGstReg_GprLast)
2644 || enmGstReg == kIemNativeGstReg_MxCsr);
2645 Assert( idxHstReg != UINT8_MAX
2646 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2647 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2648 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2649
2650 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2651
2652 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2653 return off;
2654}
2655
2656
2657/**
2658 * Flush the given set of guest registers if marked as dirty.
2659 *
2660 * @returns New code buffer offset.
2661 * @param pReNative The native recompile state.
2662 * @param off Current code buffer position.
2663 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2664 */
2665DECL_HIDDEN_THROW(uint32_t)
2666iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2667{
2668 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2669 if (bmGstRegShadowDirty)
2670 {
2671# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2672 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2673 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2674# endif
2675 do
2676 {
2677 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2678 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2679 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2680 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2681 } while (bmGstRegShadowDirty);
2682 }
2683
2684 return off;
2685}
2686
2687
2688/**
2689 * Flush all shadowed guest registers marked as dirty for the given host register.
2690 *
2691 * @returns New code buffer offset.
2692 * @param pReNative The native recompile state.
2693 * @param off Current code buffer position.
2694 * @param idxHstReg The host register.
2695 *
2696 * @note This doesn't do any unshadowing of guest registers from the host register.
2697 */
2698DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2699{
2700 /* We need to flush any pending guest register writes this host register shadows. */
2701 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2702 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2703 {
2704# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2705 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2706 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2707# endif
2708 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2709 * likely to only have a single bit set. It'll be in the 0..15 range,
2710 * but still it's 15 unnecessary loops for the last guest register. */
2711
2712 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2713 do
2714 {
2715 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2716 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2717 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2718 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2719 } while (bmGstRegShadowDirty);
2720 }
2721
2722 return off;
2723}
2724#endif
2725
2726
2727/**
2728 * Locate a register, possibly freeing one up.
2729 *
2730 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2731 * failed.
2732 *
2733 * @returns Host register number on success. Returns UINT8_MAX if no registers
2734 * found, the caller is supposed to deal with this and raise a
2735 * allocation type specific status code (if desired).
2736 *
2737 * @throws VBox status code if we're run into trouble spilling a variable of
2738 * recording debug info. Does NOT throw anything if we're out of
2739 * registers, though.
2740 */
2741static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2742 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2743{
2744 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2745 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2746 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2747
2748 /*
2749 * Try a freed register that's shadowing a guest register.
2750 */
2751 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2752 if (fRegs)
2753 {
2754 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2755
2756#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2757 /*
2758 * When we have livness information, we use it to kick out all shadowed
2759 * guest register that will not be needed any more in this TB. If we're
2760 * lucky, this may prevent us from ending up here again.
2761 *
2762 * Note! We must consider the previous entry here so we don't free
2763 * anything that the current threaded function requires (current
2764 * entry is produced by the next threaded function).
2765 */
2766 uint32_t const idxCurCall = pReNative->idxCurCall;
2767 if (idxCurCall > 0)
2768 {
2769 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2770
2771# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2772 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2773 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2774 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2775#else
2776 /* Construct a mask of the registers not in the read or write state.
2777 Note! We could skips writes, if they aren't from us, as this is just
2778 a hack to prevent trashing registers that have just been written
2779 or will be written when we retire the current instruction. */
2780 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2781 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2782 & IEMLIVENESSBIT_MASK;
2783#endif
2784 /* Merge EFLAGS. */
2785 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2786 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2787 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2788 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2789 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2790
2791 /* If it matches any shadowed registers. */
2792 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2793 {
2794#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2795 /* Writeback any dirty shadow registers we are about to unshadow. */
2796 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2797#endif
2798
2799 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2800 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2801 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2802
2803 /* See if we've got any unshadowed registers we can return now. */
2804 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2805 if (fUnshadowedRegs)
2806 {
2807 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2808 return (fPreferVolatile
2809 ? ASMBitFirstSetU32(fUnshadowedRegs)
2810 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2811 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2812 - 1;
2813 }
2814 }
2815 }
2816#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2817
2818 unsigned const idxReg = (fPreferVolatile
2819 ? ASMBitFirstSetU32(fRegs)
2820 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2821 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2822 - 1;
2823
2824 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2825 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2826 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2827 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2828
2829#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2830 /* We need to flush any pending guest register writes this host register shadows. */
2831 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2832#endif
2833
2834 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2835 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2836 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2837 return idxReg;
2838 }
2839
2840 /*
2841 * Try free up a variable that's in a register.
2842 *
2843 * We do two rounds here, first evacuating variables we don't need to be
2844 * saved on the stack, then in the second round move things to the stack.
2845 */
2846 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2847 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2848 {
2849 uint32_t fVars = pReNative->Core.bmVars;
2850 while (fVars)
2851 {
2852 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2853 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2854#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2855 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2856 continue;
2857#endif
2858
2859 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2860 && (RT_BIT_32(idxReg) & fRegMask)
2861 && ( iLoop == 0
2862 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2863 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2864 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2865 {
2866 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2867 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2868 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2869 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2870 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2871 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2872#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2873 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2874#endif
2875
2876 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2877 {
2878 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2879 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2880 }
2881
2882 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2883 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2884
2885 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2886 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2887 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2888 return idxReg;
2889 }
2890 fVars &= ~RT_BIT_32(idxVar);
2891 }
2892 }
2893
2894 return UINT8_MAX;
2895}
2896
2897
2898/**
2899 * Reassigns a variable to a different register specified by the caller.
2900 *
2901 * @returns The new code buffer position.
2902 * @param pReNative The native recompile state.
2903 * @param off The current code buffer position.
2904 * @param idxVar The variable index.
2905 * @param idxRegOld The old host register number.
2906 * @param idxRegNew The new host register number.
2907 * @param pszCaller The caller for logging.
2908 */
2909static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2910 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2911{
2912 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2913 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2914#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2915 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2916#endif
2917 RT_NOREF(pszCaller);
2918
2919#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2920 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2921#endif
2922 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2923
2924 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2925#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2926 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2927#endif
2928 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2929 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2930 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2931
2932 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2933 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2934 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2935 if (fGstRegShadows)
2936 {
2937 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2938 | RT_BIT_32(idxRegNew);
2939 while (fGstRegShadows)
2940 {
2941 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2942 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2943
2944 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2945 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2946 }
2947 }
2948
2949 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2950 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2951 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2952 return off;
2953}
2954
2955
2956/**
2957 * Moves a variable to a different register or spills it onto the stack.
2958 *
2959 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2960 * kinds can easily be recreated if needed later.
2961 *
2962 * @returns The new code buffer position.
2963 * @param pReNative The native recompile state.
2964 * @param off The current code buffer position.
2965 * @param idxVar The variable index.
2966 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2967 * call-volatile registers.
2968 */
2969DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2970 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
2971{
2972 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2973 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2974 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
2975 Assert(!pVar->fRegAcquired);
2976
2977 uint8_t const idxRegOld = pVar->idxReg;
2978 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2979 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2980 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2981 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2982 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2983 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2984 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2985 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2986#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2987 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2988#endif
2989
2990
2991 /** @todo Add statistics on this.*/
2992 /** @todo Implement basic variable liveness analysis (python) so variables
2993 * can be freed immediately once no longer used. This has the potential to
2994 * be trashing registers and stack for dead variables.
2995 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
2996
2997 /*
2998 * First try move it to a different register, as that's cheaper.
2999 */
3000 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3001 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3002 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3003 if (fRegs)
3004 {
3005 /* Avoid using shadow registers, if possible. */
3006 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3007 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3008 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3009 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3010 }
3011
3012 /*
3013 * Otherwise we must spill the register onto the stack.
3014 */
3015 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3016 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3017 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3018 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3019
3020 pVar->idxReg = UINT8_MAX;
3021 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3022 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3023 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3024 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3025 return off;
3026}
3027
3028
3029/**
3030 * Allocates a temporary host general purpose register.
3031 *
3032 * This may emit code to save register content onto the stack in order to free
3033 * up a register.
3034 *
3035 * @returns The host register number; throws VBox status code on failure,
3036 * so no need to check the return value.
3037 * @param pReNative The native recompile state.
3038 * @param poff Pointer to the variable with the code buffer position.
3039 * This will be update if we need to move a variable from
3040 * register to stack in order to satisfy the request.
3041 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3042 * registers (@c true, default) or the other way around
3043 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3044 */
3045DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3046{
3047 /*
3048 * Try find a completely unused register, preferably a call-volatile one.
3049 */
3050 uint8_t idxReg;
3051 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3052 & ~pReNative->Core.bmHstRegsWithGstShadow
3053 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3054 if (fRegs)
3055 {
3056 if (fPreferVolatile)
3057 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3058 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3059 else
3060 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3061 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3062 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3063 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3064 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3065 }
3066 else
3067 {
3068 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3069 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3070 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3071 }
3072 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3073}
3074
3075
3076/**
3077 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3078 * registers.
3079 *
3080 * @returns The host register number; throws VBox status code on failure,
3081 * so no need to check the return value.
3082 * @param pReNative The native recompile state.
3083 * @param poff Pointer to the variable with the code buffer position.
3084 * This will be update if we need to move a variable from
3085 * register to stack in order to satisfy the request.
3086 * @param fRegMask Mask of acceptable registers.
3087 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3088 * registers (@c true, default) or the other way around
3089 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3090 */
3091DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3092 bool fPreferVolatile /*= true*/)
3093{
3094 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3095 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3096
3097 /*
3098 * Try find a completely unused register, preferably a call-volatile one.
3099 */
3100 uint8_t idxReg;
3101 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3102 & ~pReNative->Core.bmHstRegsWithGstShadow
3103 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3104 & fRegMask;
3105 if (fRegs)
3106 {
3107 if (fPreferVolatile)
3108 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3109 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3110 else
3111 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3112 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3113 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3114 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3115 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3116 }
3117 else
3118 {
3119 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3120 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3121 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3122 }
3123 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3124}
3125
3126
3127/**
3128 * Allocates a temporary register for loading an immediate value into.
3129 *
3130 * This will emit code to load the immediate, unless there happens to be an
3131 * unused register with the value already loaded.
3132 *
3133 * The caller will not modify the returned register, it must be considered
3134 * read-only. Free using iemNativeRegFreeTmpImm.
3135 *
3136 * @returns The host register number; throws VBox status code on failure, so no
3137 * need to check the return value.
3138 * @param pReNative The native recompile state.
3139 * @param poff Pointer to the variable with the code buffer position.
3140 * @param uImm The immediate value that the register must hold upon
3141 * return.
3142 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3143 * registers (@c true, default) or the other way around
3144 * (@c false).
3145 *
3146 * @note Reusing immediate values has not been implemented yet.
3147 */
3148DECL_HIDDEN_THROW(uint8_t)
3149iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3150{
3151 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3152 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3153 return idxReg;
3154}
3155
3156
3157/**
3158 * Allocates a temporary host general purpose register for keeping a guest
3159 * register value.
3160 *
3161 * Since we may already have a register holding the guest register value,
3162 * code will be emitted to do the loading if that's not the case. Code may also
3163 * be emitted if we have to free up a register to satify the request.
3164 *
3165 * @returns The host register number; throws VBox status code on failure, so no
3166 * need to check the return value.
3167 * @param pReNative The native recompile state.
3168 * @param poff Pointer to the variable with the code buffer
3169 * position. This will be update if we need to move a
3170 * variable from register to stack in order to satisfy
3171 * the request.
3172 * @param enmGstReg The guest register that will is to be updated.
3173 * @param enmIntendedUse How the caller will be using the host register.
3174 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3175 * register is okay (default). The ASSUMPTION here is
3176 * that the caller has already flushed all volatile
3177 * registers, so this is only applied if we allocate a
3178 * new register.
3179 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3180 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3181 */
3182DECL_HIDDEN_THROW(uint8_t)
3183iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3184 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3185 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3186{
3187 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3188#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3189 AssertMsg( fSkipLivenessAssert
3190 || pReNative->idxCurCall == 0
3191 || enmGstReg == kIemNativeGstReg_Pc
3192 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3193 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3194 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3195 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3196 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3197 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3198#endif
3199 RT_NOREF(fSkipLivenessAssert);
3200#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3201 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3202#endif
3203 uint32_t const fRegMask = !fNoVolatileRegs
3204 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3205 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3206
3207 /*
3208 * First check if the guest register value is already in a host register.
3209 */
3210 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3211 {
3212 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3213 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3214 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3215 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3216
3217 /* It's not supposed to be allocated... */
3218 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3219 {
3220 /*
3221 * If the register will trash the guest shadow copy, try find a
3222 * completely unused register we can use instead. If that fails,
3223 * we need to disassociate the host reg from the guest reg.
3224 */
3225 /** @todo would be nice to know if preserving the register is in any way helpful. */
3226 /* If the purpose is calculations, try duplicate the register value as
3227 we'll be clobbering the shadow. */
3228 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3229 && ( ~pReNative->Core.bmHstRegs
3230 & ~pReNative->Core.bmHstRegsWithGstShadow
3231 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3232 {
3233 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3234
3235 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3236
3237 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3238 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3239 g_apszIemNativeHstRegNames[idxRegNew]));
3240 idxReg = idxRegNew;
3241 }
3242 /* If the current register matches the restrictions, go ahead and allocate
3243 it for the caller. */
3244 else if (fRegMask & RT_BIT_32(idxReg))
3245 {
3246 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3247 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3248 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3249 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3250 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3251 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3252 else
3253 {
3254 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3255 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3256 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3257 }
3258 }
3259 /* Otherwise, allocate a register that satisfies the caller and transfer
3260 the shadowing if compatible with the intended use. (This basically
3261 means the call wants a non-volatile register (RSP push/pop scenario).) */
3262 else
3263 {
3264 Assert(fNoVolatileRegs);
3265 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3266 !fNoVolatileRegs
3267 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3268 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3269 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3270 {
3271 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3272 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3273 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3274 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3275 }
3276 else
3277 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3278 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3279 g_apszIemNativeHstRegNames[idxRegNew]));
3280 idxReg = idxRegNew;
3281 }
3282 }
3283 else
3284 {
3285 /*
3286 * Oops. Shadowed guest register already allocated!
3287 *
3288 * Allocate a new register, copy the value and, if updating, the
3289 * guest shadow copy assignment to the new register.
3290 */
3291 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3292 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3293 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3294 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3295
3296 /** @todo share register for readonly access. */
3297 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3298 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3299
3300 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3301 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3302
3303 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3304 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3305 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3306 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3307 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3308 else
3309 {
3310 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3311 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3312 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3313 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3314 }
3315 idxReg = idxRegNew;
3316 }
3317 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3318
3319#ifdef VBOX_STRICT
3320 /* Strict builds: Check that the value is correct. */
3321 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3322#endif
3323
3324#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3325 /** @todo r=aeichner Implement for registers other than GPR as well. */
3326 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3327 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3328 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3329 && enmGstReg <= kIemNativeGstReg_GprLast)
3330 || enmGstReg == kIemNativeGstReg_MxCsr))
3331 {
3332# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3333 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3334 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3335# endif
3336 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3337 }
3338#endif
3339
3340 return idxReg;
3341 }
3342
3343 /*
3344 * Allocate a new register, load it with the guest value and designate it as a copy of the
3345 */
3346 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3347
3348 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3349 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3350
3351 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3352 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3353 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3354 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3355
3356#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3357 /** @todo r=aeichner Implement for registers other than GPR as well. */
3358 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3359 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3360 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3361 && enmGstReg <= kIemNativeGstReg_GprLast)
3362 || enmGstReg == kIemNativeGstReg_MxCsr))
3363 {
3364# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3365 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3366 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3367# endif
3368 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3369 }
3370#endif
3371
3372 return idxRegNew;
3373}
3374
3375
3376/**
3377 * Allocates a temporary host general purpose register that already holds the
3378 * given guest register value.
3379 *
3380 * The use case for this function is places where the shadowing state cannot be
3381 * modified due to branching and such. This will fail if the we don't have a
3382 * current shadow copy handy or if it's incompatible. The only code that will
3383 * be emitted here is value checking code in strict builds.
3384 *
3385 * The intended use can only be readonly!
3386 *
3387 * @returns The host register number, UINT8_MAX if not present.
3388 * @param pReNative The native recompile state.
3389 * @param poff Pointer to the instruction buffer offset.
3390 * Will be updated in strict builds if a register is
3391 * found.
3392 * @param enmGstReg The guest register that will is to be updated.
3393 * @note In strict builds, this may throw instruction buffer growth failures.
3394 * Non-strict builds will not throw anything.
3395 * @sa iemNativeRegAllocTmpForGuestReg
3396 */
3397DECL_HIDDEN_THROW(uint8_t)
3398iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3399{
3400 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3401#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3402 AssertMsg( pReNative->idxCurCall == 0
3403 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3404 || enmGstReg == kIemNativeGstReg_Pc,
3405 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3406#endif
3407
3408 /*
3409 * First check if the guest register value is already in a host register.
3410 */
3411 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3412 {
3413 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3414 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3415 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3416 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3417
3418 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3419 {
3420 /*
3421 * We only do readonly use here, so easy compared to the other
3422 * variant of this code.
3423 */
3424 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3425 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3426 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3427 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3428 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3429
3430#ifdef VBOX_STRICT
3431 /* Strict builds: Check that the value is correct. */
3432 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3433#else
3434 RT_NOREF(poff);
3435#endif
3436 return idxReg;
3437 }
3438 }
3439
3440 return UINT8_MAX;
3441}
3442
3443
3444/**
3445 * Allocates argument registers for a function call.
3446 *
3447 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3448 * need to check the return value.
3449 * @param pReNative The native recompile state.
3450 * @param off The current code buffer offset.
3451 * @param cArgs The number of arguments the function call takes.
3452 */
3453DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3454{
3455 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3456 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3457 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3458 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3459
3460 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3461 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3462 else if (cArgs == 0)
3463 return true;
3464
3465 /*
3466 * Do we get luck and all register are free and not shadowing anything?
3467 */
3468 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3469 for (uint32_t i = 0; i < cArgs; i++)
3470 {
3471 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3472 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3473 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3474 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3475 }
3476 /*
3477 * Okay, not lucky so we have to free up the registers.
3478 */
3479 else
3480 for (uint32_t i = 0; i < cArgs; i++)
3481 {
3482 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3483 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3484 {
3485 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3486 {
3487 case kIemNativeWhat_Var:
3488 {
3489 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3491 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3492 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3493 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3494#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3495 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3496#endif
3497
3498 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3499 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3500 else
3501 {
3502 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3503 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3504 }
3505 break;
3506 }
3507
3508 case kIemNativeWhat_Tmp:
3509 case kIemNativeWhat_Arg:
3510 case kIemNativeWhat_rc:
3511 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3512 default:
3513 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3514 }
3515
3516 }
3517 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3518 {
3519 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3520 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3521 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3522#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3523 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3524#endif
3525 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3526 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3527 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3528 }
3529 else
3530 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3531 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3532 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3533 }
3534 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3535 return true;
3536}
3537
3538
3539DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3540
3541
3542#if 0
3543/**
3544 * Frees a register assignment of any type.
3545 *
3546 * @param pReNative The native recompile state.
3547 * @param idxHstReg The register to free.
3548 *
3549 * @note Does not update variables.
3550 */
3551DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3552{
3553 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3554 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3555 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3556 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3557 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3558 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3559 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3560 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3561 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3562 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3563 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3564 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3565 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3566 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3567
3568 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3569 /* no flushing, right:
3570 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3571 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3572 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3573 */
3574}
3575#endif
3576
3577
3578/**
3579 * Frees a temporary register.
3580 *
3581 * Any shadow copies of guest registers assigned to the host register will not
3582 * be flushed by this operation.
3583 */
3584DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3585{
3586 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3587 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3588 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3589 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3590 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3591}
3592
3593
3594/**
3595 * Frees a temporary immediate register.
3596 *
3597 * It is assumed that the call has not modified the register, so it still hold
3598 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3599 */
3600DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3601{
3602 iemNativeRegFreeTmp(pReNative, idxHstReg);
3603}
3604
3605
3606/**
3607 * Frees a register assigned to a variable.
3608 *
3609 * The register will be disassociated from the variable.
3610 */
3611DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3612{
3613 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3614 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3615 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3616 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3617 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3618#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3619 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3620#endif
3621
3622 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3623 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3624 if (!fFlushShadows)
3625 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3626 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3627 else
3628 {
3629 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3630 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3631#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3632 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3633#endif
3634 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3635 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3636 uint64_t fGstRegShadows = fGstRegShadowsOld;
3637 while (fGstRegShadows)
3638 {
3639 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3640 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3641
3642 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3643 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3644 }
3645 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3646 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3647 }
3648}
3649
3650
3651#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3652# ifdef LOG_ENABLED
3653/** Host CPU SIMD register names. */
3654DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3655{
3656# ifdef RT_ARCH_AMD64
3657 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3658# elif RT_ARCH_ARM64
3659 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3660 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3661# else
3662# error "port me"
3663# endif
3664};
3665# endif
3666
3667
3668/**
3669 * Frees a SIMD register assigned to a variable.
3670 *
3671 * The register will be disassociated from the variable.
3672 */
3673DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3674{
3675 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3676 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3677 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3679 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3680 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3681
3682 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3683 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3684 if (!fFlushShadows)
3685 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3686 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3687 else
3688 {
3689 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3690 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3691 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3692 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3693 uint64_t fGstRegShadows = fGstRegShadowsOld;
3694 while (fGstRegShadows)
3695 {
3696 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3697 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3698
3699 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3700 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3701 }
3702 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3703 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3704 }
3705}
3706
3707
3708/**
3709 * Reassigns a variable to a different SIMD register specified by the caller.
3710 *
3711 * @returns The new code buffer position.
3712 * @param pReNative The native recompile state.
3713 * @param off The current code buffer position.
3714 * @param idxVar The variable index.
3715 * @param idxRegOld The old host register number.
3716 * @param idxRegNew The new host register number.
3717 * @param pszCaller The caller for logging.
3718 */
3719static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3720 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3721{
3722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3723 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3724 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3725 RT_NOREF(pszCaller);
3726
3727 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3728 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3729 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3730
3731 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3732 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3733 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3734
3735 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3736 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3737 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3738
3739 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3740 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3741 else
3742 {
3743 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3744 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3745 }
3746
3747 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3748 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3749 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3750 if (fGstRegShadows)
3751 {
3752 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3753 | RT_BIT_32(idxRegNew);
3754 while (fGstRegShadows)
3755 {
3756 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3757 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3758
3759 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3760 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3761 }
3762 }
3763
3764 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3765 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3766 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3767 return off;
3768}
3769
3770
3771/**
3772 * Moves a variable to a different register or spills it onto the stack.
3773 *
3774 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3775 * kinds can easily be recreated if needed later.
3776 *
3777 * @returns The new code buffer position.
3778 * @param pReNative The native recompile state.
3779 * @param off The current code buffer position.
3780 * @param idxVar The variable index.
3781 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3782 * call-volatile registers.
3783 */
3784DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3785 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3786{
3787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3788 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3789 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3790 Assert(!pVar->fRegAcquired);
3791 Assert(!pVar->fSimdReg);
3792
3793 uint8_t const idxRegOld = pVar->idxReg;
3794 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3795 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3796 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3797 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3798 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3799 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3800 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3801 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3802 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3803 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3804
3805 /** @todo Add statistics on this.*/
3806 /** @todo Implement basic variable liveness analysis (python) so variables
3807 * can be freed immediately once no longer used. This has the potential to
3808 * be trashing registers and stack for dead variables.
3809 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3810
3811 /*
3812 * First try move it to a different register, as that's cheaper.
3813 */
3814 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3815 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3816 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3817 if (fRegs)
3818 {
3819 /* Avoid using shadow registers, if possible. */
3820 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3821 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3822 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3823 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3824 }
3825
3826 /*
3827 * Otherwise we must spill the register onto the stack.
3828 */
3829 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3830 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3831 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3832
3833 if (pVar->cbVar == sizeof(RTUINT128U))
3834 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3835 else
3836 {
3837 Assert(pVar->cbVar == sizeof(RTUINT256U));
3838 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3839 }
3840
3841 pVar->idxReg = UINT8_MAX;
3842 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3843 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3844 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3845 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3846 return off;
3847}
3848
3849
3850/**
3851 * Called right before emitting a call instruction to move anything important
3852 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3853 * optionally freeing argument variables.
3854 *
3855 * @returns New code buffer offset, UINT32_MAX on failure.
3856 * @param pReNative The native recompile state.
3857 * @param off The code buffer offset.
3858 * @param cArgs The number of arguments the function call takes.
3859 * It is presumed that the host register part of these have
3860 * been allocated as such already and won't need moving,
3861 * just freeing.
3862 * @param fKeepVars Mask of variables that should keep their register
3863 * assignments. Caller must take care to handle these.
3864 */
3865DECL_HIDDEN_THROW(uint32_t)
3866iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3867{
3868 Assert(!cArgs); RT_NOREF(cArgs);
3869
3870 /* fKeepVars will reduce this mask. */
3871 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3872
3873 /*
3874 * Move anything important out of volatile registers.
3875 */
3876 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3877#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3878 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3879#endif
3880 ;
3881
3882 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3883 if (!fSimdRegsToMove)
3884 { /* likely */ }
3885 else
3886 {
3887 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3888 while (fSimdRegsToMove != 0)
3889 {
3890 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3891 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3892
3893 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3894 {
3895 case kIemNativeWhat_Var:
3896 {
3897 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3900 Assert(pVar->idxReg == idxSimdReg);
3901 Assert(pVar->fSimdReg);
3902 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3903 {
3904 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3905 idxVar, pVar->enmKind, pVar->idxReg));
3906 if (pVar->enmKind != kIemNativeVarKind_Stack)
3907 pVar->idxReg = UINT8_MAX;
3908 else
3909 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3910 }
3911 else
3912 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3913 continue;
3914 }
3915
3916 case kIemNativeWhat_Arg:
3917 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3918 continue;
3919
3920 case kIemNativeWhat_rc:
3921 case kIemNativeWhat_Tmp:
3922 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3923 continue;
3924
3925 case kIemNativeWhat_FixedReserved:
3926#ifdef RT_ARCH_ARM64
3927 continue; /* On ARM the upper half of the virtual 256-bit register. */
3928#endif
3929
3930 case kIemNativeWhat_FixedTmp:
3931 case kIemNativeWhat_pVCpuFixed:
3932 case kIemNativeWhat_pCtxFixed:
3933 case kIemNativeWhat_PcShadow:
3934 case kIemNativeWhat_Invalid:
3935 case kIemNativeWhat_End:
3936 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3937 }
3938 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3939 }
3940 }
3941
3942 /*
3943 * Do the actual freeing.
3944 */
3945 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3946 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3947 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3948 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3949
3950 /* If there are guest register shadows in any call-volatile register, we
3951 have to clear the corrsponding guest register masks for each register. */
3952 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3953 if (fHstSimdRegsWithGstShadow)
3954 {
3955 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3956 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
3957 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
3958 do
3959 {
3960 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
3961 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
3962
3963 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
3964
3965#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3966 /*
3967 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
3968 * to call volatile registers).
3969 */
3970 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3971 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
3972 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
3973#endif
3974 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3975 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
3976
3977 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
3978 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
3979 } while (fHstSimdRegsWithGstShadow != 0);
3980 }
3981
3982 return off;
3983}
3984#endif
3985
3986
3987/**
3988 * Called right before emitting a call instruction to move anything important
3989 * out of call-volatile registers, free and flush the call-volatile registers,
3990 * optionally freeing argument variables.
3991 *
3992 * @returns New code buffer offset, UINT32_MAX on failure.
3993 * @param pReNative The native recompile state.
3994 * @param off The code buffer offset.
3995 * @param cArgs The number of arguments the function call takes.
3996 * It is presumed that the host register part of these have
3997 * been allocated as such already and won't need moving,
3998 * just freeing.
3999 * @param fKeepVars Mask of variables that should keep their register
4000 * assignments. Caller must take care to handle these.
4001 */
4002DECL_HIDDEN_THROW(uint32_t)
4003iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4004{
4005 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4006
4007 /* fKeepVars will reduce this mask. */
4008 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4009
4010 /*
4011 * Move anything important out of volatile registers.
4012 */
4013 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4014 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4015 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4016#ifdef IEMNATIVE_REG_FIXED_TMP0
4017 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4018#endif
4019#ifdef IEMNATIVE_REG_FIXED_TMP1
4020 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4021#endif
4022#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4023 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4024#endif
4025 & ~g_afIemNativeCallRegs[cArgs];
4026
4027 fRegsToMove &= pReNative->Core.bmHstRegs;
4028 if (!fRegsToMove)
4029 { /* likely */ }
4030 else
4031 {
4032 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4033 while (fRegsToMove != 0)
4034 {
4035 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4036 fRegsToMove &= ~RT_BIT_32(idxReg);
4037
4038 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4039 {
4040 case kIemNativeWhat_Var:
4041 {
4042 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4044 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4045 Assert(pVar->idxReg == idxReg);
4046#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4047 Assert(!pVar->fSimdReg);
4048#endif
4049 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4050 {
4051 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4052 idxVar, pVar->enmKind, pVar->idxReg));
4053 if (pVar->enmKind != kIemNativeVarKind_Stack)
4054 pVar->idxReg = UINT8_MAX;
4055 else
4056 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4057 }
4058 else
4059 fRegsToFree &= ~RT_BIT_32(idxReg);
4060 continue;
4061 }
4062
4063 case kIemNativeWhat_Arg:
4064 AssertMsgFailed(("What?!?: %u\n", idxReg));
4065 continue;
4066
4067 case kIemNativeWhat_rc:
4068 case kIemNativeWhat_Tmp:
4069 AssertMsgFailed(("Missing free: %u\n", idxReg));
4070 continue;
4071
4072 case kIemNativeWhat_FixedTmp:
4073 case kIemNativeWhat_pVCpuFixed:
4074 case kIemNativeWhat_pCtxFixed:
4075 case kIemNativeWhat_PcShadow:
4076 case kIemNativeWhat_FixedReserved:
4077 case kIemNativeWhat_Invalid:
4078 case kIemNativeWhat_End:
4079 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4080 }
4081 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4082 }
4083 }
4084
4085 /*
4086 * Do the actual freeing.
4087 */
4088 if (pReNative->Core.bmHstRegs & fRegsToFree)
4089 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4090 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4091 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4092
4093 /* If there are guest register shadows in any call-volatile register, we
4094 have to clear the corrsponding guest register masks for each register. */
4095 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4096 if (fHstRegsWithGstShadow)
4097 {
4098 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4099 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4100 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4101 do
4102 {
4103 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4104 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4105
4106 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4107
4108#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4109 /*
4110 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4111 * to call volatile registers).
4112 */
4113 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4114 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4115 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4116#endif
4117
4118 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4119 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4120 } while (fHstRegsWithGstShadow != 0);
4121 }
4122
4123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4124 /* Now for the SIMD registers, no argument support for now. */
4125 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4126#endif
4127
4128 return off;
4129}
4130
4131
4132/**
4133 * Flushes a set of guest register shadow copies.
4134 *
4135 * This is usually done after calling a threaded function or a C-implementation
4136 * of an instruction.
4137 *
4138 * @param pReNative The native recompile state.
4139 * @param fGstRegs Set of guest registers to flush.
4140 */
4141DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4142{
4143 /*
4144 * Reduce the mask by what's currently shadowed
4145 */
4146 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4147 fGstRegs &= bmGstRegShadowsOld;
4148 if (fGstRegs)
4149 {
4150 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4151 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4152 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4153 if (bmGstRegShadowsNew)
4154 {
4155 /*
4156 * Partial.
4157 */
4158 do
4159 {
4160 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4161 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4162 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4163 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4164 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4165#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4166 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4167#endif
4168
4169 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4170 fGstRegs &= ~fInThisHstReg;
4171 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4172 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4173 if (!fGstRegShadowsNew)
4174 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4175 } while (fGstRegs != 0);
4176 }
4177 else
4178 {
4179 /*
4180 * Clear all.
4181 */
4182 do
4183 {
4184 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4185 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4186 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4187 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4188 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4189#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4190 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4191#endif
4192
4193 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4194 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4195 } while (fGstRegs != 0);
4196 pReNative->Core.bmHstRegsWithGstShadow = 0;
4197 }
4198 }
4199}
4200
4201
4202/**
4203 * Flushes guest register shadow copies held by a set of host registers.
4204 *
4205 * This is used with the TLB lookup code for ensuring that we don't carry on
4206 * with any guest shadows in volatile registers, as these will get corrupted by
4207 * a TLB miss.
4208 *
4209 * @param pReNative The native recompile state.
4210 * @param fHstRegs Set of host registers to flush guest shadows for.
4211 */
4212DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4213{
4214 /*
4215 * Reduce the mask by what's currently shadowed.
4216 */
4217 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4218 fHstRegs &= bmHstRegsWithGstShadowOld;
4219 if (fHstRegs)
4220 {
4221 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4222 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4223 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4224 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4225 if (bmHstRegsWithGstShadowNew)
4226 {
4227 /*
4228 * Partial (likely).
4229 */
4230 uint64_t fGstShadows = 0;
4231 do
4232 {
4233 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4234 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4235 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4236 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4237#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4238 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4239#endif
4240
4241 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4242 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4243 fHstRegs &= ~RT_BIT_32(idxHstReg);
4244 } while (fHstRegs != 0);
4245 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4246 }
4247 else
4248 {
4249 /*
4250 * Clear all.
4251 */
4252 do
4253 {
4254 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4255 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4258#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4259 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4260#endif
4261
4262 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4263 fHstRegs &= ~RT_BIT_32(idxHstReg);
4264 } while (fHstRegs != 0);
4265 pReNative->Core.bmGstRegShadows = 0;
4266 }
4267 }
4268}
4269
4270
4271/**
4272 * Restores guest shadow copies in volatile registers.
4273 *
4274 * This is used after calling a helper function (think TLB miss) to restore the
4275 * register state of volatile registers.
4276 *
4277 * @param pReNative The native recompile state.
4278 * @param off The code buffer offset.
4279 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4280 * be active (allocated) w/o asserting. Hack.
4281 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4282 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4283 */
4284DECL_HIDDEN_THROW(uint32_t)
4285iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4286{
4287 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4288 if (fHstRegs)
4289 {
4290 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4291 do
4292 {
4293 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4294
4295 /* It's not fatal if a register is active holding a variable that
4296 shadowing a guest register, ASSUMING all pending guest register
4297 writes were flushed prior to the helper call. However, we'll be
4298 emitting duplicate restores, so it wasts code space. */
4299 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4300 RT_NOREF(fHstRegsActiveShadows);
4301
4302 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4303#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4304 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4305#endif
4306 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4307 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4308 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4309
4310 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4311 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4312
4313 fHstRegs &= ~RT_BIT_32(idxHstReg);
4314 } while (fHstRegs != 0);
4315 }
4316 return off;
4317}
4318
4319
4320
4321
4322/*********************************************************************************************************************************
4323* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4324*********************************************************************************************************************************/
4325#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4326
4327/**
4328 * Info about shadowed guest SIMD register values.
4329 * @see IEMNATIVEGSTSIMDREG
4330 */
4331static struct
4332{
4333 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4334 uint32_t offXmm;
4335 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4336 uint32_t offYmm;
4337 /** Name (for logging). */
4338 const char *pszName;
4339} const g_aGstSimdShadowInfo[] =
4340{
4341#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4342 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4343 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4344 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4345 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4346 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4347 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4348 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4349 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4350 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4351 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4352 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4353 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4354 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4355 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4356 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4357 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4358 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4359#undef CPUMCTX_OFF_AND_SIZE
4360};
4361AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4362
4363
4364/**
4365 * Frees a temporary SIMD register.
4366 *
4367 * Any shadow copies of guest registers assigned to the host register will not
4368 * be flushed by this operation.
4369 */
4370DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4371{
4372 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4373 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4374 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4375 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4376 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4377}
4378
4379
4380/**
4381 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4382 *
4383 * @returns New code bufferoffset.
4384 * @param pReNative The native recompile state.
4385 * @param off Current code buffer position.
4386 * @param enmGstSimdReg The guest SIMD register to flush.
4387 */
4388DECL_HIDDEN_THROW(uint32_t)
4389iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4390{
4391 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4392
4393 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4394 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4395 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4396 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4397
4398 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4399 {
4400 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4401 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4402 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4403 }
4404
4405 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4406 {
4407 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4408 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4409 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4410 }
4411
4412 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4413 return off;
4414}
4415
4416
4417/**
4418 * Flush the given set of guest SIMD registers if marked as dirty.
4419 *
4420 * @returns New code buffer offset.
4421 * @param pReNative The native recompile state.
4422 * @param off Current code buffer position.
4423 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4424 */
4425DECL_HIDDEN_THROW(uint32_t)
4426iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4427{
4428 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4429 & fFlushGstSimdReg;
4430 if (bmGstSimdRegShadowDirty)
4431 {
4432# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4433 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4434 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4435# endif
4436
4437 do
4438 {
4439 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4440 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4441 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4442 } while (bmGstSimdRegShadowDirty);
4443 }
4444
4445 return off;
4446}
4447
4448
4449#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4450/**
4451 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4452 *
4453 * @returns New code buffer offset.
4454 * @param pReNative The native recompile state.
4455 * @param off Current code buffer position.
4456 * @param idxHstSimdReg The host SIMD register.
4457 *
4458 * @note This doesn't do any unshadowing of guest registers from the host register.
4459 */
4460DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4461{
4462 /* We need to flush any pending guest register writes this host register shadows. */
4463 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4464 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4465 if (bmGstSimdRegShadowDirty)
4466 {
4467# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4468 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4469 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4470# endif
4471
4472 do
4473 {
4474 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4475 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4476 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4477 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4478 } while (bmGstSimdRegShadowDirty);
4479 }
4480
4481 return off;
4482}
4483#endif
4484
4485
4486/**
4487 * Locate a register, possibly freeing one up.
4488 *
4489 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4490 * failed.
4491 *
4492 * @returns Host register number on success. Returns UINT8_MAX if no registers
4493 * found, the caller is supposed to deal with this and raise a
4494 * allocation type specific status code (if desired).
4495 *
4496 * @throws VBox status code if we're run into trouble spilling a variable of
4497 * recording debug info. Does NOT throw anything if we're out of
4498 * registers, though.
4499 */
4500static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4501 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4502{
4503 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4504 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4505 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4506
4507 /*
4508 * Try a freed register that's shadowing a guest register.
4509 */
4510 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4511 if (fRegs)
4512 {
4513 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4514
4515#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4516 /*
4517 * When we have livness information, we use it to kick out all shadowed
4518 * guest register that will not be needed any more in this TB. If we're
4519 * lucky, this may prevent us from ending up here again.
4520 *
4521 * Note! We must consider the previous entry here so we don't free
4522 * anything that the current threaded function requires (current
4523 * entry is produced by the next threaded function).
4524 */
4525 uint32_t const idxCurCall = pReNative->idxCurCall;
4526 if (idxCurCall > 0)
4527 {
4528 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4529
4530# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4531 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4532 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4533 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4534#else
4535 /* Construct a mask of the registers not in the read or write state.
4536 Note! We could skips writes, if they aren't from us, as this is just
4537 a hack to prevent trashing registers that have just been written
4538 or will be written when we retire the current instruction. */
4539 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4540 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4541 & IEMLIVENESSBIT_MASK;
4542#endif
4543 /* If it matches any shadowed registers. */
4544 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4545 {
4546 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4547 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4548 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4549
4550 /* See if we've got any unshadowed registers we can return now. */
4551 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4552 if (fUnshadowedRegs)
4553 {
4554 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4555 return (fPreferVolatile
4556 ? ASMBitFirstSetU32(fUnshadowedRegs)
4557 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4558 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4559 - 1;
4560 }
4561 }
4562 }
4563#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4564
4565 unsigned const idxReg = (fPreferVolatile
4566 ? ASMBitFirstSetU32(fRegs)
4567 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4568 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4569 - 1;
4570
4571 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4572 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4573 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4574 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4575
4576 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4577 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4578
4579 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4580 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4581 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4582 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4583 return idxReg;
4584 }
4585
4586 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4587
4588 /*
4589 * Try free up a variable that's in a register.
4590 *
4591 * We do two rounds here, first evacuating variables we don't need to be
4592 * saved on the stack, then in the second round move things to the stack.
4593 */
4594 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4595 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4596 {
4597 uint32_t fVars = pReNative->Core.bmVars;
4598 while (fVars)
4599 {
4600 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4601 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4602 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4603 continue;
4604
4605 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4606 && (RT_BIT_32(idxReg) & fRegMask)
4607 && ( iLoop == 0
4608 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4609 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4610 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4611 {
4612 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4613 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4614 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4615 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4616 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4617 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4618
4619 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4620 {
4621 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4622 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4623 }
4624
4625 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4626 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4627
4628 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4629 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4630 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4631 return idxReg;
4632 }
4633 fVars &= ~RT_BIT_32(idxVar);
4634 }
4635 }
4636
4637 AssertFailed();
4638 return UINT8_MAX;
4639}
4640
4641
4642/**
4643 * Flushes a set of guest register shadow copies.
4644 *
4645 * This is usually done after calling a threaded function or a C-implementation
4646 * of an instruction.
4647 *
4648 * @param pReNative The native recompile state.
4649 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4650 */
4651DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4652{
4653 /*
4654 * Reduce the mask by what's currently shadowed
4655 */
4656 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4657 fGstSimdRegs &= bmGstSimdRegShadows;
4658 if (fGstSimdRegs)
4659 {
4660 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4661 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4662 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4663 if (bmGstSimdRegShadowsNew)
4664 {
4665 /*
4666 * Partial.
4667 */
4668 do
4669 {
4670 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4671 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4672 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4673 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4674 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4675 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4676
4677 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4678 fGstSimdRegs &= ~fInThisHstReg;
4679 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4680 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4681 if (!fGstRegShadowsNew)
4682 {
4683 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4684 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4685 }
4686 } while (fGstSimdRegs != 0);
4687 }
4688 else
4689 {
4690 /*
4691 * Clear all.
4692 */
4693 do
4694 {
4695 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4696 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4697 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4698 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4699 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4700 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4701
4702 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4703 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4704 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4705 } while (fGstSimdRegs != 0);
4706 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4707 }
4708 }
4709}
4710
4711
4712/**
4713 * Allocates a temporary host SIMD register.
4714 *
4715 * This may emit code to save register content onto the stack in order to free
4716 * up a register.
4717 *
4718 * @returns The host register number; throws VBox status code on failure,
4719 * so no need to check the return value.
4720 * @param pReNative The native recompile state.
4721 * @param poff Pointer to the variable with the code buffer position.
4722 * This will be update if we need to move a variable from
4723 * register to stack in order to satisfy the request.
4724 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4725 * registers (@c true, default) or the other way around
4726 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4727 */
4728DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4729{
4730 /*
4731 * Try find a completely unused register, preferably a call-volatile one.
4732 */
4733 uint8_t idxSimdReg;
4734 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4735 & ~pReNative->Core.bmHstRegsWithGstShadow
4736 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4737 if (fRegs)
4738 {
4739 if (fPreferVolatile)
4740 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4741 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4742 else
4743 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4744 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4745 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4746 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4747
4748 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4749 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4750 }
4751 else
4752 {
4753 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4754 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4755 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4756 }
4757
4758 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4759 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4760}
4761
4762
4763/**
4764 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4765 * registers.
4766 *
4767 * @returns The host register number; throws VBox status code on failure,
4768 * so no need to check the return value.
4769 * @param pReNative The native recompile state.
4770 * @param poff Pointer to the variable with the code buffer position.
4771 * This will be update if we need to move a variable from
4772 * register to stack in order to satisfy the request.
4773 * @param fRegMask Mask of acceptable registers.
4774 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4775 * registers (@c true, default) or the other way around
4776 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4777 */
4778DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4779 bool fPreferVolatile /*= true*/)
4780{
4781 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4782 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4783
4784 /*
4785 * Try find a completely unused register, preferably a call-volatile one.
4786 */
4787 uint8_t idxSimdReg;
4788 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4789 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4790 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4791 & fRegMask;
4792 if (fRegs)
4793 {
4794 if (fPreferVolatile)
4795 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4796 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4797 else
4798 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4799 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4800 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4801 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4802
4803 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4804 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4805 }
4806 else
4807 {
4808 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4809 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4810 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4811 }
4812
4813 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4814 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4815}
4816
4817
4818/**
4819 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4820 *
4821 * @param pReNative The native recompile state.
4822 * @param idxHstSimdReg The host SIMD register to update the state for.
4823 * @param enmLoadSz The load size to set.
4824 */
4825DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4826 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4827{
4828 /* Everything valid already? -> nothing to do. */
4829 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4830 return;
4831
4832 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4833 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4834 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4835 {
4836 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4837 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4838 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4839 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4840 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4841 }
4842}
4843
4844
4845static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4846 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4847{
4848 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4849 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4850 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4851 {
4852# ifdef RT_ARCH_ARM64
4853 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4854 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4855# endif
4856
4857 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4858 {
4859 switch (enmLoadSzDst)
4860 {
4861 case kIemNativeGstSimdRegLdStSz_256:
4862 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4863 break;
4864 case kIemNativeGstSimdRegLdStSz_Low128:
4865 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4866 break;
4867 case kIemNativeGstSimdRegLdStSz_High128:
4868 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4869 break;
4870 default:
4871 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4872 }
4873
4874 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4875 }
4876 }
4877 else
4878 {
4879 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4880 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4881 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4882 }
4883
4884 return off;
4885}
4886
4887
4888/**
4889 * Allocates a temporary host SIMD register for keeping a guest
4890 * SIMD register value.
4891 *
4892 * Since we may already have a register holding the guest register value,
4893 * code will be emitted to do the loading if that's not the case. Code may also
4894 * be emitted if we have to free up a register to satify the request.
4895 *
4896 * @returns The host register number; throws VBox status code on failure, so no
4897 * need to check the return value.
4898 * @param pReNative The native recompile state.
4899 * @param poff Pointer to the variable with the code buffer
4900 * position. This will be update if we need to move a
4901 * variable from register to stack in order to satisfy
4902 * the request.
4903 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4904 * @param enmIntendedUse How the caller will be using the host register.
4905 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4906 * register is okay (default). The ASSUMPTION here is
4907 * that the caller has already flushed all volatile
4908 * registers, so this is only applied if we allocate a
4909 * new register.
4910 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4911 */
4912DECL_HIDDEN_THROW(uint8_t)
4913iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4914 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4915 bool fNoVolatileRegs /*= false*/)
4916{
4917 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4918#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4919 AssertMsg( pReNative->idxCurCall == 0
4920 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4921 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4922 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4923 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4924 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4925 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4926#endif
4927#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4928 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4929#endif
4930 uint32_t const fRegMask = !fNoVolatileRegs
4931 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4932 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4933
4934 /*
4935 * First check if the guest register value is already in a host register.
4936 */
4937 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4938 {
4939 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4940 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4941 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4942 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4943
4944 /* It's not supposed to be allocated... */
4945 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4946 {
4947 /*
4948 * If the register will trash the guest shadow copy, try find a
4949 * completely unused register we can use instead. If that fails,
4950 * we need to disassociate the host reg from the guest reg.
4951 */
4952 /** @todo would be nice to know if preserving the register is in any way helpful. */
4953 /* If the purpose is calculations, try duplicate the register value as
4954 we'll be clobbering the shadow. */
4955 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4956 && ( ~pReNative->Core.bmHstSimdRegs
4957 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4958 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
4959 {
4960 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
4961
4962 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
4963
4964 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
4965 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
4966 g_apszIemNativeHstSimdRegNames[idxRegNew]));
4967 idxSimdReg = idxRegNew;
4968 }
4969 /* If the current register matches the restrictions, go ahead and allocate
4970 it for the caller. */
4971 else if (fRegMask & RT_BIT_32(idxSimdReg))
4972 {
4973 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
4974 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
4975 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4976 {
4977 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4978 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
4979 else
4980 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
4981 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
4982 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4983 }
4984 else
4985 {
4986 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
4987 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
4988 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
4989 }
4990 }
4991 /* Otherwise, allocate a register that satisfies the caller and transfer
4992 the shadowing if compatible with the intended use. (This basically
4993 means the call wants a non-volatile register (RSP push/pop scenario).) */
4994 else
4995 {
4996 Assert(fNoVolatileRegs);
4997 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
4998 !fNoVolatileRegs
4999 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5000 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5001 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5002 {
5003 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5004 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5005 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5006 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5007 }
5008 else
5009 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5010 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5011 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5012 idxSimdReg = idxRegNew;
5013 }
5014 }
5015 else
5016 {
5017 /*
5018 * Oops. Shadowed guest register already allocated!
5019 *
5020 * Allocate a new register, copy the value and, if updating, the
5021 * guest shadow copy assignment to the new register.
5022 */
5023 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5024 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5025 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5026 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5027
5028 /** @todo share register for readonly access. */
5029 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5030 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5031
5032 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5033 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5034 else
5035 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5036
5037 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5038 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5039 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5040 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5041 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5042 else
5043 {
5044 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5045 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5046 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5047 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5048 }
5049 idxSimdReg = idxRegNew;
5050 }
5051 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5052
5053#ifdef VBOX_STRICT
5054 /* Strict builds: Check that the value is correct. */
5055 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5056 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5057#endif
5058
5059 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5060 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5061 {
5062# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5063 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5064 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5065# endif
5066
5067 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5068 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5069 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5070 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5071 else
5072 {
5073 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5074 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5075 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5076 }
5077 }
5078
5079 return idxSimdReg;
5080 }
5081
5082 /*
5083 * Allocate a new register, load it with the guest value and designate it as a copy of the
5084 */
5085 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5086
5087 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5088 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5089 else
5090 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5091
5092 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5093 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5094
5095 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5096 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5097 {
5098# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5099 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5100 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5101# endif
5102
5103 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5104 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5105 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5106 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5107 else
5108 {
5109 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5110 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5111 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5112 }
5113 }
5114
5115 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5116 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5117
5118 return idxRegNew;
5119}
5120
5121
5122/**
5123 * Flushes guest SIMD register shadow copies held by a set of host registers.
5124 *
5125 * This is used whenever calling an external helper for ensuring that we don't carry on
5126 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5127 *
5128 * @param pReNative The native recompile state.
5129 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5130 */
5131DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5132{
5133 /*
5134 * Reduce the mask by what's currently shadowed.
5135 */
5136 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5137 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5138 if (fHstSimdRegs)
5139 {
5140 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5141 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5142 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5143 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5144 if (bmHstSimdRegsWithGstShadowNew)
5145 {
5146 /*
5147 * Partial (likely).
5148 */
5149 uint64_t fGstShadows = 0;
5150 do
5151 {
5152 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5153 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5154 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5155 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5156 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5157 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5158
5159 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5160 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5161 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5162 } while (fHstSimdRegs != 0);
5163 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5164 }
5165 else
5166 {
5167 /*
5168 * Clear all.
5169 */
5170 do
5171 {
5172 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5173 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5174 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5175 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5176 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5177 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5178
5179 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5180 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5181 } while (fHstSimdRegs != 0);
5182 pReNative->Core.bmGstSimdRegShadows = 0;
5183 }
5184 }
5185}
5186#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5187
5188
5189
5190/*********************************************************************************************************************************
5191* Code emitters for flushing pending guest register writes and sanity checks *
5192*********************************************************************************************************************************/
5193
5194#ifdef VBOX_STRICT
5195/**
5196 * Does internal register allocator sanity checks.
5197 */
5198DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5199{
5200 /*
5201 * Iterate host registers building a guest shadowing set.
5202 */
5203 uint64_t bmGstRegShadows = 0;
5204 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5205 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5206 while (bmHstRegsWithGstShadow)
5207 {
5208 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5209 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5210 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5211
5212 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5213 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5214 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5215 bmGstRegShadows |= fThisGstRegShadows;
5216 while (fThisGstRegShadows)
5217 {
5218 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5219 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5220 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5221 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5222 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5223 }
5224 }
5225 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5226 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5227 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5228
5229 /*
5230 * Now the other way around, checking the guest to host index array.
5231 */
5232 bmHstRegsWithGstShadow = 0;
5233 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5234 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5235 while (bmGstRegShadows)
5236 {
5237 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5238 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5239 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5240
5241 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5242 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5243 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5244 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5245 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5246 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5247 }
5248 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5249 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5250 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5251}
5252#endif /* VBOX_STRICT */
5253
5254
5255/**
5256 * Flushes any delayed guest register writes.
5257 *
5258 * This must be called prior to calling CImpl functions and any helpers that use
5259 * the guest state (like raising exceptions) and such.
5260 *
5261 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5262 * the caller if it wishes to do so.
5263 */
5264DECL_HIDDEN_THROW(uint32_t)
5265iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5266{
5267#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5268 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5269 off = iemNativeEmitPcWriteback(pReNative, off);
5270#else
5271 RT_NOREF(pReNative, fGstShwExcept);
5272#endif
5273
5274#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5275 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5276#endif
5277
5278#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5279 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5280#endif
5281
5282 return off;
5283}
5284
5285
5286#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5287/**
5288 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5289 */
5290DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5291{
5292 Assert(pReNative->Core.offPc);
5293# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5294 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5295 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5296# endif
5297
5298# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5299 /* Allocate a temporary PC register. */
5300 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5301
5302 /* Perform the addition and store the result. */
5303 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5304 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5305
5306 /* Free but don't flush the PC register. */
5307 iemNativeRegFreeTmp(pReNative, idxPcReg);
5308# else
5309 /* Compare the shadow with the context value, they should match. */
5310 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5311 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5312# endif
5313
5314 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5315 pReNative->Core.offPc = 0;
5316 pReNative->Core.cInstrPcUpdateSkipped = 0;
5317
5318 return off;
5319}
5320#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5321
5322
5323/*********************************************************************************************************************************
5324* Code Emitters (larger snippets) *
5325*********************************************************************************************************************************/
5326
5327/**
5328 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5329 * extending to 64-bit width.
5330 *
5331 * @returns New code buffer offset on success, UINT32_MAX on failure.
5332 * @param pReNative .
5333 * @param off The current code buffer position.
5334 * @param idxHstReg The host register to load the guest register value into.
5335 * @param enmGstReg The guest register to load.
5336 *
5337 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5338 * that is something the caller needs to do if applicable.
5339 */
5340DECL_HIDDEN_THROW(uint32_t)
5341iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5342{
5343 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5344 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5345
5346 switch (g_aGstShadowInfo[enmGstReg].cb)
5347 {
5348 case sizeof(uint64_t):
5349 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5350 case sizeof(uint32_t):
5351 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5352 case sizeof(uint16_t):
5353 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5354#if 0 /* not present in the table. */
5355 case sizeof(uint8_t):
5356 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5357#endif
5358 default:
5359 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5360 }
5361}
5362
5363
5364#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5365/**
5366 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5367 *
5368 * @returns New code buffer offset on success, UINT32_MAX on failure.
5369 * @param pReNative The recompiler state.
5370 * @param off The current code buffer position.
5371 * @param idxHstSimdReg The host register to load the guest register value into.
5372 * @param enmGstSimdReg The guest register to load.
5373 * @param enmLoadSz The load size of the register.
5374 *
5375 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5376 * that is something the caller needs to do if applicable.
5377 */
5378DECL_HIDDEN_THROW(uint32_t)
5379iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5380 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5381{
5382 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5383
5384 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5385 switch (enmLoadSz)
5386 {
5387 case kIemNativeGstSimdRegLdStSz_256:
5388 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5389 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5390 case kIemNativeGstSimdRegLdStSz_Low128:
5391 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5392 case kIemNativeGstSimdRegLdStSz_High128:
5393 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5394 default:
5395 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5396 }
5397}
5398#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5399
5400#ifdef VBOX_STRICT
5401
5402/**
5403 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5404 *
5405 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5406 * Trashes EFLAGS on AMD64.
5407 */
5408DECL_HIDDEN_THROW(uint32_t)
5409iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5410{
5411# ifdef RT_ARCH_AMD64
5412 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5413
5414 /* rol reg64, 32 */
5415 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5416 pbCodeBuf[off++] = 0xc1;
5417 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5418 pbCodeBuf[off++] = 32;
5419
5420 /* test reg32, ffffffffh */
5421 if (idxReg >= 8)
5422 pbCodeBuf[off++] = X86_OP_REX_B;
5423 pbCodeBuf[off++] = 0xf7;
5424 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5425 pbCodeBuf[off++] = 0xff;
5426 pbCodeBuf[off++] = 0xff;
5427 pbCodeBuf[off++] = 0xff;
5428 pbCodeBuf[off++] = 0xff;
5429
5430 /* je/jz +1 */
5431 pbCodeBuf[off++] = 0x74;
5432 pbCodeBuf[off++] = 0x01;
5433
5434 /* int3 */
5435 pbCodeBuf[off++] = 0xcc;
5436
5437 /* rol reg64, 32 */
5438 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5439 pbCodeBuf[off++] = 0xc1;
5440 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5441 pbCodeBuf[off++] = 32;
5442
5443# elif defined(RT_ARCH_ARM64)
5444 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5445 /* lsr tmp0, reg64, #32 */
5446 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5447 /* cbz tmp0, +1 */
5448 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5449 /* brk #0x1100 */
5450 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5451
5452# else
5453# error "Port me!"
5454# endif
5455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5456 return off;
5457}
5458
5459
5460/**
5461 * Emitting code that checks that the content of register @a idxReg is the same
5462 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5463 * instruction if that's not the case.
5464 *
5465 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5466 * Trashes EFLAGS on AMD64.
5467 */
5468DECL_HIDDEN_THROW(uint32_t)
5469iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5470{
5471#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5472 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5473 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5474 return off;
5475#endif
5476
5477# ifdef RT_ARCH_AMD64
5478 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5479
5480 /* cmp reg, [mem] */
5481 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5482 {
5483 if (idxReg >= 8)
5484 pbCodeBuf[off++] = X86_OP_REX_R;
5485 pbCodeBuf[off++] = 0x38;
5486 }
5487 else
5488 {
5489 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5490 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5491 else
5492 {
5493 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5494 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5495 else
5496 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5497 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5498 if (idxReg >= 8)
5499 pbCodeBuf[off++] = X86_OP_REX_R;
5500 }
5501 pbCodeBuf[off++] = 0x39;
5502 }
5503 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5504
5505 /* je/jz +1 */
5506 pbCodeBuf[off++] = 0x74;
5507 pbCodeBuf[off++] = 0x01;
5508
5509 /* int3 */
5510 pbCodeBuf[off++] = 0xcc;
5511
5512 /* For values smaller than the register size, we must check that the rest
5513 of the register is all zeros. */
5514 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5515 {
5516 /* test reg64, imm32 */
5517 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5518 pbCodeBuf[off++] = 0xf7;
5519 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5520 pbCodeBuf[off++] = 0;
5521 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5522 pbCodeBuf[off++] = 0xff;
5523 pbCodeBuf[off++] = 0xff;
5524
5525 /* je/jz +1 */
5526 pbCodeBuf[off++] = 0x74;
5527 pbCodeBuf[off++] = 0x01;
5528
5529 /* int3 */
5530 pbCodeBuf[off++] = 0xcc;
5531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5532 }
5533 else
5534 {
5535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5536 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5537 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5538 }
5539
5540# elif defined(RT_ARCH_ARM64)
5541 /* mov TMP0, [gstreg] */
5542 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5543
5544 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5545 /* sub tmp0, tmp0, idxReg */
5546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5547 /* cbz tmp0, +1 */
5548 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5549 /* brk #0x1000+enmGstReg */
5550 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5552
5553# else
5554# error "Port me!"
5555# endif
5556 return off;
5557}
5558
5559
5560# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5561# ifdef RT_ARCH_AMD64
5562/**
5563 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5564 */
5565DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5566{
5567 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5568 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5569 if (idxSimdReg >= 8)
5570 pbCodeBuf[off++] = X86_OP_REX_R;
5571 pbCodeBuf[off++] = 0x0f;
5572 pbCodeBuf[off++] = 0x38;
5573 pbCodeBuf[off++] = 0x29;
5574 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5575
5576 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5577 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5578 pbCodeBuf[off++] = X86_OP_REX_W
5579 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5580 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5581 pbCodeBuf[off++] = 0x0f;
5582 pbCodeBuf[off++] = 0x3a;
5583 pbCodeBuf[off++] = 0x16;
5584 pbCodeBuf[off++] = 0xeb;
5585 pbCodeBuf[off++] = 0x00;
5586
5587 /* cmp tmp0, 0xffffffffffffffff. */
5588 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5589 pbCodeBuf[off++] = 0x83;
5590 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5591 pbCodeBuf[off++] = 0xff;
5592
5593 /* je/jz +1 */
5594 pbCodeBuf[off++] = 0x74;
5595 pbCodeBuf[off++] = 0x01;
5596
5597 /* int3 */
5598 pbCodeBuf[off++] = 0xcc;
5599
5600 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5601 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5602 pbCodeBuf[off++] = X86_OP_REX_W
5603 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5604 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5605 pbCodeBuf[off++] = 0x0f;
5606 pbCodeBuf[off++] = 0x3a;
5607 pbCodeBuf[off++] = 0x16;
5608 pbCodeBuf[off++] = 0xeb;
5609 pbCodeBuf[off++] = 0x01;
5610
5611 /* cmp tmp0, 0xffffffffffffffff. */
5612 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5613 pbCodeBuf[off++] = 0x83;
5614 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5615 pbCodeBuf[off++] = 0xff;
5616
5617 /* je/jz +1 */
5618 pbCodeBuf[off++] = 0x74;
5619 pbCodeBuf[off++] = 0x01;
5620
5621 /* int3 */
5622 pbCodeBuf[off++] = 0xcc;
5623
5624 return off;
5625}
5626# endif
5627
5628
5629/**
5630 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5631 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5632 * instruction if that's not the case.
5633 *
5634 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5635 * Trashes EFLAGS on AMD64.
5636 */
5637DECL_HIDDEN_THROW(uint32_t)
5638iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5639 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5640{
5641 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5642 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5643 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5644 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5645 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5646 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5647 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5648 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5649 return off;
5650
5651# ifdef RT_ARCH_AMD64
5652 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5653 {
5654 /* movdqa vectmp0, idxSimdReg */
5655 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5656
5657 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5658
5659 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5660 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5661 }
5662
5663 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5664 {
5665 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5666 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5667
5668 /* vextracti128 vectmp0, idxSimdReg, 1 */
5669 pbCodeBuf[off++] = X86_OP_VEX3;
5670 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5671 | X86_OP_VEX3_BYTE1_X
5672 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5673 | 0x03; /* Opcode map */
5674 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5675 pbCodeBuf[off++] = 0x39;
5676 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5677 pbCodeBuf[off++] = 0x01;
5678
5679 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5680 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5681 }
5682# elif defined(RT_ARCH_ARM64)
5683 /* mov vectmp0, [gstreg] */
5684 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5685
5686 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5687 {
5688 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5689 /* eor vectmp0, vectmp0, idxSimdReg */
5690 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5691 /* uaddlv vectmp0, vectmp0.16B */
5692 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5693 /* umov tmp0, vectmp0.H[0] */
5694 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5695 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5696 /* cbz tmp0, +1 */
5697 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5698 /* brk #0x1000+enmGstReg */
5699 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5700 }
5701
5702 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5703 {
5704 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5705 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5706 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5707 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5708 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5709 /* umov tmp0, (vectmp0 + 1).H[0] */
5710 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5711 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5712 /* cbz tmp0, +1 */
5713 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5714 /* brk #0x1000+enmGstReg */
5715 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5716 }
5717
5718# else
5719# error "Port me!"
5720# endif
5721
5722 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5723 return off;
5724}
5725# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5726
5727
5728/**
5729 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5730 * important bits.
5731 *
5732 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5733 * Trashes EFLAGS on AMD64.
5734 */
5735DECL_HIDDEN_THROW(uint32_t)
5736iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5737{
5738 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5739 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5740 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5741 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5742
5743#ifdef RT_ARCH_AMD64
5744 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5745
5746 /* je/jz +1 */
5747 pbCodeBuf[off++] = 0x74;
5748 pbCodeBuf[off++] = 0x01;
5749
5750 /* int3 */
5751 pbCodeBuf[off++] = 0xcc;
5752
5753# elif defined(RT_ARCH_ARM64)
5754 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5755
5756 /* b.eq +1 */
5757 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5758 /* brk #0x2000 */
5759 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5760
5761# else
5762# error "Port me!"
5763# endif
5764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5765
5766 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5767 return off;
5768}
5769
5770#endif /* VBOX_STRICT */
5771
5772
5773#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5774/**
5775 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5776 */
5777DECL_HIDDEN_THROW(uint32_t)
5778iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5779{
5780 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5781
5782 fEflNeeded &= X86_EFL_STATUS_BITS;
5783 if (fEflNeeded)
5784 {
5785# ifdef RT_ARCH_AMD64
5786 /* test dword [pVCpu + offVCpu], imm32 */
5787 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5788 if (fEflNeeded <= 0xff)
5789 {
5790 pCodeBuf[off++] = 0xf6;
5791 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5792 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5793 }
5794 else
5795 {
5796 pCodeBuf[off++] = 0xf7;
5797 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5798 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5799 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5800 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5801 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5802 }
5803 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5804
5805# else
5806 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5807 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5808 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5809# ifdef RT_ARCH_ARM64
5810 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5811 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5812# else
5813# error "Port me!"
5814# endif
5815 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5816# endif
5817 }
5818 return off;
5819}
5820#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5821
5822
5823/**
5824 * Emits a code for checking the return code of a call and rcPassUp, returning
5825 * from the code if either are non-zero.
5826 */
5827DECL_HIDDEN_THROW(uint32_t)
5828iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5829{
5830#ifdef RT_ARCH_AMD64
5831 /*
5832 * AMD64: eax = call status code.
5833 */
5834
5835 /* edx = rcPassUp */
5836 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5837# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5838 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5839# endif
5840
5841 /* edx = eax | rcPassUp */
5842 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5843 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5845 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5846
5847 /* Jump to non-zero status return path. */
5848 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5849
5850 /* done. */
5851
5852#elif RT_ARCH_ARM64
5853 /*
5854 * ARM64: w0 = call status code.
5855 */
5856# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5857 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5858# endif
5859 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5860
5861 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5862
5863 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5864
5865 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5866 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5867 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5868
5869#else
5870# error "port me"
5871#endif
5872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5873 RT_NOREF_PV(idxInstr);
5874 return off;
5875}
5876
5877
5878/**
5879 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5880 * raising a \#GP(0) if it isn't.
5881 *
5882 * @returns New code buffer offset, UINT32_MAX on failure.
5883 * @param pReNative The native recompile state.
5884 * @param off The code buffer offset.
5885 * @param idxAddrReg The host register with the address to check.
5886 * @param idxInstr The current instruction.
5887 */
5888DECL_HIDDEN_THROW(uint32_t)
5889iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5890{
5891 /*
5892 * Make sure we don't have any outstanding guest register writes as we may
5893 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5894 */
5895 off = iemNativeRegFlushPendingWrites(pReNative, off);
5896
5897#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5898 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5899#else
5900 RT_NOREF(idxInstr);
5901#endif
5902
5903#ifdef RT_ARCH_AMD64
5904 /*
5905 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5906 * return raisexcpt();
5907 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5908 */
5909 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5910
5911 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5912 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5913 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5914 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5915 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5916
5917 iemNativeRegFreeTmp(pReNative, iTmpReg);
5918
5919#elif defined(RT_ARCH_ARM64)
5920 /*
5921 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5922 * return raisexcpt();
5923 * ----
5924 * mov x1, 0x800000000000
5925 * add x1, x0, x1
5926 * cmp xzr, x1, lsr 48
5927 * b.ne .Lraisexcpt
5928 */
5929 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5930
5931 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5932 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5933 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5934 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5935
5936 iemNativeRegFreeTmp(pReNative, iTmpReg);
5937
5938#else
5939# error "Port me"
5940#endif
5941 return off;
5942}
5943
5944
5945/**
5946 * Emits code to check if that the content of @a idxAddrReg is within the limit
5947 * of CS, raising a \#GP(0) if it isn't.
5948 *
5949 * @returns New code buffer offset; throws VBox status code on error.
5950 * @param pReNative The native recompile state.
5951 * @param off The code buffer offset.
5952 * @param idxAddrReg The host register (32-bit) with the address to
5953 * check.
5954 * @param idxInstr The current instruction.
5955 */
5956DECL_HIDDEN_THROW(uint32_t)
5957iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5958 uint8_t idxAddrReg, uint8_t idxInstr)
5959{
5960 /*
5961 * Make sure we don't have any outstanding guest register writes as we may
5962 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5963 */
5964 off = iemNativeRegFlushPendingWrites(pReNative, off);
5965
5966#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5967 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5968#else
5969 RT_NOREF(idxInstr);
5970#endif
5971
5972 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5973 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5974 kIemNativeGstRegUse_ReadOnly);
5975
5976 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5977 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5978
5979 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5980 return off;
5981}
5982
5983
5984/**
5985 * Emits a call to a CImpl function or something similar.
5986 */
5987DECL_HIDDEN_THROW(uint32_t)
5988iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5989 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5990{
5991 /* Writeback everything. */
5992 off = iemNativeRegFlushPendingWrites(pReNative, off);
5993
5994 /*
5995 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5996 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5997 */
5998 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5999 fGstShwFlush
6000 | RT_BIT_64(kIemNativeGstReg_Pc)
6001 | RT_BIT_64(kIemNativeGstReg_EFlags));
6002 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6003
6004 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6005
6006 /*
6007 * Load the parameters.
6008 */
6009#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6010 /* Special code the hidden VBOXSTRICTRC pointer. */
6011 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6012 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6013 if (cAddParams > 0)
6014 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6015 if (cAddParams > 1)
6016 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6017 if (cAddParams > 2)
6018 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6019 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6020
6021#else
6022 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6023 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6024 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6025 if (cAddParams > 0)
6026 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6027 if (cAddParams > 1)
6028 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6029 if (cAddParams > 2)
6030# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6031 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6032# else
6033 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6034# endif
6035#endif
6036
6037 /*
6038 * Make the call.
6039 */
6040 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6041
6042#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6043 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6044#endif
6045
6046 /*
6047 * Check the status code.
6048 */
6049 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6050}
6051
6052
6053/**
6054 * Emits a call to a threaded worker function.
6055 */
6056DECL_HIDDEN_THROW(uint32_t)
6057iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6058{
6059 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6060
6061 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6062 off = iemNativeRegFlushPendingWrites(pReNative, off);
6063
6064 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6065 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6066
6067#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6068 /* The threaded function may throw / long jmp, so set current instruction
6069 number if we're counting. */
6070 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6071#endif
6072
6073 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6074
6075#ifdef RT_ARCH_AMD64
6076 /* Load the parameters and emit the call. */
6077# ifdef RT_OS_WINDOWS
6078# ifndef VBOXSTRICTRC_STRICT_ENABLED
6079 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6080 if (cParams > 0)
6081 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6082 if (cParams > 1)
6083 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6084 if (cParams > 2)
6085 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6086# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6087 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6088 if (cParams > 0)
6089 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6090 if (cParams > 1)
6091 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6092 if (cParams > 2)
6093 {
6094 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6095 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6096 }
6097 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6098# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6099# else
6100 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6101 if (cParams > 0)
6102 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6103 if (cParams > 1)
6104 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6105 if (cParams > 2)
6106 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6107# endif
6108
6109 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6110
6111# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6112 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6113# endif
6114
6115#elif RT_ARCH_ARM64
6116 /*
6117 * ARM64:
6118 */
6119 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6120 if (cParams > 0)
6121 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6122 if (cParams > 1)
6123 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6124 if (cParams > 2)
6125 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6126
6127 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6128
6129#else
6130# error "port me"
6131#endif
6132
6133 /*
6134 * Check the status code.
6135 */
6136 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6137
6138 return off;
6139}
6140
6141#ifdef VBOX_WITH_STATISTICS
6142/**
6143 * Emits code to update the thread call statistics.
6144 */
6145DECL_INLINE_THROW(uint32_t)
6146iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6147{
6148 /*
6149 * Update threaded function stats.
6150 */
6151 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6152 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6153# if defined(RT_ARCH_ARM64)
6154 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6155 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6156 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6157 iemNativeRegFreeTmp(pReNative, idxTmp1);
6158 iemNativeRegFreeTmp(pReNative, idxTmp2);
6159# else
6160 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6161# endif
6162 return off;
6163}
6164#endif /* VBOX_WITH_STATISTICS */
6165
6166
6167/**
6168 * Emits the code at the ReturnWithFlags label (returns
6169 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6170 */
6171static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6172{
6173 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6174 if (idxLabel != UINT32_MAX)
6175 {
6176 iemNativeLabelDefine(pReNative, idxLabel, off);
6177
6178 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6179
6180 /* jump back to the return sequence. */
6181 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6182 }
6183 return off;
6184}
6185
6186
6187/**
6188 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6189 */
6190static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6191{
6192 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6193 if (idxLabel != UINT32_MAX)
6194 {
6195 iemNativeLabelDefine(pReNative, idxLabel, off);
6196
6197 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6198
6199 /* jump back to the return sequence. */
6200 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6201 }
6202 return off;
6203}
6204
6205
6206/**
6207 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6208 */
6209static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6210{
6211 /*
6212 * Generate the rc + rcPassUp fiddling code if needed.
6213 */
6214 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6215 if (idxLabel != UINT32_MAX)
6216 {
6217 iemNativeLabelDefine(pReNative, idxLabel, off);
6218
6219 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6220#ifdef RT_ARCH_AMD64
6221# ifdef RT_OS_WINDOWS
6222# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6224# endif
6225 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6226 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6227# else
6228 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6229 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6230# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6231 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6232# endif
6233# endif
6234# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6235 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6236# endif
6237
6238#else
6239 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6240 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6241 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6242#endif
6243
6244 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6245 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6246 }
6247 return off;
6248}
6249
6250
6251/**
6252 * Emits a standard epilog.
6253 */
6254static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6255{
6256 *pidxReturnLabel = UINT32_MAX;
6257
6258 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6259 off = iemNativeRegFlushPendingWrites(pReNative, off);
6260
6261 /*
6262 * Successful return, so clear the return register (eax, w0).
6263 */
6264 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6265
6266 /*
6267 * Define label for common return point.
6268 */
6269 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6270 *pidxReturnLabel = idxReturn;
6271
6272 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6273
6274 /*
6275 * Restore registers and return.
6276 */
6277#ifdef RT_ARCH_AMD64
6278 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6279
6280 /* Reposition esp at the r15 restore point. */
6281 pbCodeBuf[off++] = X86_OP_REX_W;
6282 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6283 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6284 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6285
6286 /* Pop non-volatile registers and return */
6287 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6288 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6289 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6290 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6291 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6292 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6293 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6294 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6295# ifdef RT_OS_WINDOWS
6296 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6297 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6298# endif
6299 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6300 pbCodeBuf[off++] = 0xc9; /* leave */
6301 pbCodeBuf[off++] = 0xc3; /* ret */
6302 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6303
6304#elif RT_ARCH_ARM64
6305 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6306
6307 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6308 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6309 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6310 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6311 IEMNATIVE_FRAME_VAR_SIZE / 8);
6312 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6313 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6314 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6315 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6316 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6317 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6318 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6319 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6320 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6321 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6322 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6323 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6324
6325 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6326 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6327 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6328 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6329
6330 /* retab / ret */
6331# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6332 if (1)
6333 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6334 else
6335# endif
6336 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6337
6338#else
6339# error "port me"
6340#endif
6341 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6342
6343 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6344}
6345
6346
6347/**
6348 * Emits a standard prolog.
6349 */
6350static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6351{
6352#ifdef RT_ARCH_AMD64
6353 /*
6354 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6355 * reserving 64 bytes for stack variables plus 4 non-register argument
6356 * slots. Fixed register assignment: xBX = pReNative;
6357 *
6358 * Since we always do the same register spilling, we can use the same
6359 * unwind description for all the code.
6360 */
6361 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6362 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6363 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6364 pbCodeBuf[off++] = 0x8b;
6365 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6366 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6367 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6368# ifdef RT_OS_WINDOWS
6369 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6370 pbCodeBuf[off++] = 0x8b;
6371 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6372 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6373 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6374# else
6375 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6376 pbCodeBuf[off++] = 0x8b;
6377 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6378# endif
6379 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6380 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6381 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6382 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6383 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6384 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6385 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6386 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6387
6388# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6389 /* Save the frame pointer. */
6390 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6391# endif
6392
6393 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6394 X86_GREG_xSP,
6395 IEMNATIVE_FRAME_ALIGN_SIZE
6396 + IEMNATIVE_FRAME_VAR_SIZE
6397 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6398 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6399 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6400 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6401 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6402
6403#elif RT_ARCH_ARM64
6404 /*
6405 * We set up a stack frame exactly like on x86, only we have to push the
6406 * return address our selves here. We save all non-volatile registers.
6407 */
6408 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6409
6410# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6411 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6412 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6413 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6414 /* pacibsp */
6415 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6416# endif
6417
6418 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6419 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6420 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6421 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6422 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6423 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6424 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6425 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6426 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6427 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6428 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6429 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6430 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6431 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6432 /* Save the BP and LR (ret address) registers at the top of the frame. */
6433 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6434 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6435 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6436 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6437 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6438 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6439
6440 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6441 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6442
6443 /* mov r28, r0 */
6444 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6445 /* mov r27, r1 */
6446 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6447
6448# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6449 /* Save the frame pointer. */
6450 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6451 ARMV8_A64_REG_X2);
6452# endif
6453
6454#else
6455# error "port me"
6456#endif
6457 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6458 return off;
6459}
6460
6461
6462/*********************************************************************************************************************************
6463* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6464*********************************************************************************************************************************/
6465
6466/**
6467 * Internal work that allocates a variable with kind set to
6468 * kIemNativeVarKind_Invalid and no current stack allocation.
6469 *
6470 * The kind will either be set by the caller or later when the variable is first
6471 * assigned a value.
6472 *
6473 * @returns Unpacked index.
6474 * @internal
6475 */
6476static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6477{
6478 Assert(cbType > 0 && cbType <= 64);
6479 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6480 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6481 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6482 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6483 pReNative->Core.aVars[idxVar].cbVar = cbType;
6484 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6485 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6486 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6487 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6488 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6489 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6490 pReNative->Core.aVars[idxVar].u.uValue = 0;
6491#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6492 pReNative->Core.aVars[idxVar].fSimdReg = false;
6493#endif
6494 return idxVar;
6495}
6496
6497
6498/**
6499 * Internal work that allocates an argument variable w/o setting enmKind.
6500 *
6501 * @returns Unpacked index.
6502 * @internal
6503 */
6504static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6505{
6506 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6507 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6508 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6509
6510 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6511 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6512 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6513 return idxVar;
6514}
6515
6516
6517/**
6518 * Gets the stack slot for a stack variable, allocating one if necessary.
6519 *
6520 * Calling this function implies that the stack slot will contain a valid
6521 * variable value. The caller deals with any register currently assigned to the
6522 * variable, typically by spilling it into the stack slot.
6523 *
6524 * @returns The stack slot number.
6525 * @param pReNative The recompiler state.
6526 * @param idxVar The variable.
6527 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6528 */
6529DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6530{
6531 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6532 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6533 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6534
6535 /* Already got a slot? */
6536 uint8_t const idxStackSlot = pVar->idxStackSlot;
6537 if (idxStackSlot != UINT8_MAX)
6538 {
6539 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6540 return idxStackSlot;
6541 }
6542
6543 /*
6544 * A single slot is easy to allocate.
6545 * Allocate them from the top end, closest to BP, to reduce the displacement.
6546 */
6547 if (pVar->cbVar <= sizeof(uint64_t))
6548 {
6549 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6550 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6551 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6552 pVar->idxStackSlot = (uint8_t)iSlot;
6553 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6554 return (uint8_t)iSlot;
6555 }
6556
6557 /*
6558 * We need more than one stack slot.
6559 *
6560 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6561 */
6562 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6563 Assert(pVar->cbVar <= 64);
6564 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6565 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6566 uint32_t bmStack = pReNative->Core.bmStack;
6567 while (bmStack != UINT32_MAX)
6568 {
6569 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6570 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6571 iSlot = (iSlot - 1) & ~fBitAlignMask;
6572 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6573 {
6574 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6575 pVar->idxStackSlot = (uint8_t)iSlot;
6576 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6577 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6578 return (uint8_t)iSlot;
6579 }
6580
6581 bmStack |= (fBitAllocMask << iSlot);
6582 }
6583 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6584}
6585
6586
6587/**
6588 * Changes the variable to a stack variable.
6589 *
6590 * Currently this is s only possible to do the first time the variable is used,
6591 * switching later is can be implemented but not done.
6592 *
6593 * @param pReNative The recompiler state.
6594 * @param idxVar The variable.
6595 * @throws VERR_IEM_VAR_IPE_2
6596 */
6597DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6598{
6599 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6600 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6601 if (pVar->enmKind != kIemNativeVarKind_Stack)
6602 {
6603 /* We could in theory transition from immediate to stack as well, but it
6604 would involve the caller doing work storing the value on the stack. So,
6605 till that's required we only allow transition from invalid. */
6606 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6607 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6608 pVar->enmKind = kIemNativeVarKind_Stack;
6609
6610 /* Note! We don't allocate a stack slot here, that's only done when a
6611 slot is actually needed to hold a variable value. */
6612 }
6613}
6614
6615
6616/**
6617 * Sets it to a variable with a constant value.
6618 *
6619 * This does not require stack storage as we know the value and can always
6620 * reload it, unless of course it's referenced.
6621 *
6622 * @param pReNative The recompiler state.
6623 * @param idxVar The variable.
6624 * @param uValue The immediate value.
6625 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6626 */
6627DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6628{
6629 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6630 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6631 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6632 {
6633 /* Only simple transitions for now. */
6634 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6635 pVar->enmKind = kIemNativeVarKind_Immediate;
6636 }
6637 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6638
6639 pVar->u.uValue = uValue;
6640 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6641 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6642 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6643}
6644
6645
6646/**
6647 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6648 *
6649 * This does not require stack storage as we know the value and can always
6650 * reload it. Loading is postponed till needed.
6651 *
6652 * @param pReNative The recompiler state.
6653 * @param idxVar The variable. Unpacked.
6654 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6655 *
6656 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6657 * @internal
6658 */
6659static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6660{
6661 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6662 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6663
6664 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6665 {
6666 /* Only simple transitions for now. */
6667 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6668 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6669 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6670 }
6671 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6672
6673 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6674
6675 /* Update the other variable, ensure it's a stack variable. */
6676 /** @todo handle variables with const values... that'll go boom now. */
6677 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6678 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6679}
6680
6681
6682/**
6683 * Sets the variable to a reference (pointer) to a guest register reference.
6684 *
6685 * This does not require stack storage as we know the value and can always
6686 * reload it. Loading is postponed till needed.
6687 *
6688 * @param pReNative The recompiler state.
6689 * @param idxVar The variable.
6690 * @param enmRegClass The class guest registers to reference.
6691 * @param idxReg The register within @a enmRegClass to reference.
6692 *
6693 * @throws VERR_IEM_VAR_IPE_2
6694 */
6695DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6696 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6697{
6698 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6699 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6700
6701 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6702 {
6703 /* Only simple transitions for now. */
6704 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6705 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6706 }
6707 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6708
6709 pVar->u.GstRegRef.enmClass = enmRegClass;
6710 pVar->u.GstRegRef.idx = idxReg;
6711}
6712
6713
6714DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6715{
6716 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6717}
6718
6719
6720DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6721{
6722 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6723
6724 /* Since we're using a generic uint64_t value type, we must truncate it if
6725 the variable is smaller otherwise we may end up with too large value when
6726 scaling up a imm8 w/ sign-extension.
6727
6728 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6729 in the bios, bx=1) when running on arm, because clang expect 16-bit
6730 register parameters to have bits 16 and up set to zero. Instead of
6731 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6732 CF value in the result. */
6733 switch (cbType)
6734 {
6735 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6736 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6737 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6738 }
6739 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6740 return idxVar;
6741}
6742
6743
6744DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6745{
6746 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6747 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6748 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6749 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6750 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6751 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6752
6753 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6754 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6755 return idxArgVar;
6756}
6757
6758
6759DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6760{
6761 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6762 /* Don't set to stack now, leave that to the first use as for instance
6763 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6764 return idxVar;
6765}
6766
6767
6768DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6769{
6770 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6771
6772 /* Since we're using a generic uint64_t value type, we must truncate it if
6773 the variable is smaller otherwise we may end up with too large value when
6774 scaling up a imm8 w/ sign-extension. */
6775 switch (cbType)
6776 {
6777 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6778 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6779 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6780 }
6781 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6782 return idxVar;
6783}
6784
6785
6786DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6787{
6788 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6789 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6790
6791 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6792 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6793
6794 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6795
6796 /* Truncate the value to this variables size. */
6797 switch (cbType)
6798 {
6799 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6800 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6801 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6802 }
6803
6804 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6805 iemNativeVarRegisterRelease(pReNative, idxVar);
6806 return idxVar;
6807}
6808
6809
6810/**
6811 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6812 * fixed till we call iemNativeVarRegisterRelease.
6813 *
6814 * @returns The host register number.
6815 * @param pReNative The recompiler state.
6816 * @param idxVar The variable.
6817 * @param poff Pointer to the instruction buffer offset.
6818 * In case a register needs to be freed up or the value
6819 * loaded off the stack.
6820 * @param fInitialized Set if the variable must already have been initialized.
6821 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6822 * the case.
6823 * @param idxRegPref Preferred register number or UINT8_MAX.
6824 */
6825DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6826 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6827{
6828 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6829 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6830 Assert(pVar->cbVar <= 8);
6831 Assert(!pVar->fRegAcquired);
6832
6833 uint8_t idxReg = pVar->idxReg;
6834 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6835 {
6836 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6837 && pVar->enmKind < kIemNativeVarKind_End);
6838 pVar->fRegAcquired = true;
6839 return idxReg;
6840 }
6841
6842 /*
6843 * If the kind of variable has not yet been set, default to 'stack'.
6844 */
6845 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6846 && pVar->enmKind < kIemNativeVarKind_End);
6847 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6848 iemNativeVarSetKindToStack(pReNative, idxVar);
6849
6850 /*
6851 * We have to allocate a register for the variable, even if its a stack one
6852 * as we don't know if there are modification being made to it before its
6853 * finalized (todo: analyze and insert hints about that?).
6854 *
6855 * If we can, we try get the correct register for argument variables. This
6856 * is assuming that most argument variables are fetched as close as possible
6857 * to the actual call, so that there aren't any interfering hidden calls
6858 * (memory accesses, etc) inbetween.
6859 *
6860 * If we cannot or it's a variable, we make sure no argument registers
6861 * that will be used by this MC block will be allocated here, and we always
6862 * prefer non-volatile registers to avoid needing to spill stuff for internal
6863 * call.
6864 */
6865 /** @todo Detect too early argument value fetches and warn about hidden
6866 * calls causing less optimal code to be generated in the python script. */
6867
6868 uint8_t const uArgNo = pVar->uArgNo;
6869 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6870 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6871 {
6872 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6873
6874#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6875 /* Writeback any dirty shadow registers we are about to unshadow. */
6876 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6877#endif
6878
6879 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6880 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6881 }
6882 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6883 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6884 {
6885 /** @todo there must be a better way for this and boot cArgsX? */
6886 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6887 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6888 & ~pReNative->Core.bmHstRegsWithGstShadow
6889 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6890 & fNotArgsMask;
6891 if (fRegs)
6892 {
6893 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6894 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6895 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6896 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6897 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6898 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6899 }
6900 else
6901 {
6902 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6903 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6904 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6905 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6906 }
6907 }
6908 else
6909 {
6910 idxReg = idxRegPref;
6911 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6912 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6913 }
6914 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6915 pVar->idxReg = idxReg;
6916
6917#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6918 pVar->fSimdReg = false;
6919#endif
6920
6921 /*
6922 * Load it off the stack if we've got a stack slot.
6923 */
6924 uint8_t const idxStackSlot = pVar->idxStackSlot;
6925 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6926 {
6927 Assert(fInitialized);
6928 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6929 switch (pVar->cbVar)
6930 {
6931 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6932 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6933 case 3: AssertFailed(); RT_FALL_THRU();
6934 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6935 default: AssertFailed(); RT_FALL_THRU();
6936 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6937 }
6938 }
6939 else
6940 {
6941 Assert(idxStackSlot == UINT8_MAX);
6942 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6943 }
6944 pVar->fRegAcquired = true;
6945 return idxReg;
6946}
6947
6948
6949#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6950/**
6951 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
6952 * fixed till we call iemNativeVarRegisterRelease.
6953 *
6954 * @returns The host register number.
6955 * @param pReNative The recompiler state.
6956 * @param idxVar The variable.
6957 * @param poff Pointer to the instruction buffer offset.
6958 * In case a register needs to be freed up or the value
6959 * loaded off the stack.
6960 * @param fInitialized Set if the variable must already have been initialized.
6961 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6962 * the case.
6963 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
6964 */
6965DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6966 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6967{
6968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6969 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6970 Assert( pVar->cbVar == sizeof(RTUINT128U)
6971 || pVar->cbVar == sizeof(RTUINT256U));
6972 Assert(!pVar->fRegAcquired);
6973
6974 uint8_t idxReg = pVar->idxReg;
6975 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
6976 {
6977 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6978 && pVar->enmKind < kIemNativeVarKind_End);
6979 pVar->fRegAcquired = true;
6980 return idxReg;
6981 }
6982
6983 /*
6984 * If the kind of variable has not yet been set, default to 'stack'.
6985 */
6986 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6987 && pVar->enmKind < kIemNativeVarKind_End);
6988 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6989 iemNativeVarSetKindToStack(pReNative, idxVar);
6990
6991 /*
6992 * We have to allocate a register for the variable, even if its a stack one
6993 * as we don't know if there are modification being made to it before its
6994 * finalized (todo: analyze and insert hints about that?).
6995 *
6996 * If we can, we try get the correct register for argument variables. This
6997 * is assuming that most argument variables are fetched as close as possible
6998 * to the actual call, so that there aren't any interfering hidden calls
6999 * (memory accesses, etc) inbetween.
7000 *
7001 * If we cannot or it's a variable, we make sure no argument registers
7002 * that will be used by this MC block will be allocated here, and we always
7003 * prefer non-volatile registers to avoid needing to spill stuff for internal
7004 * call.
7005 */
7006 /** @todo Detect too early argument value fetches and warn about hidden
7007 * calls causing less optimal code to be generated in the python script. */
7008
7009 uint8_t const uArgNo = pVar->uArgNo;
7010 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7011
7012 /* SIMD is bit simpler for now because there is no support for arguments. */
7013 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7014 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7015 {
7016 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7017 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7018 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7019 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7020 & fNotArgsMask;
7021 if (fRegs)
7022 {
7023 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7024 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7025 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7026 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7027 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7028 }
7029 else
7030 {
7031 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7032 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7033 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7034 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7035 }
7036 }
7037 else
7038 {
7039 idxReg = idxRegPref;
7040 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7041 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7042 }
7043 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7044
7045 pVar->fSimdReg = true;
7046 pVar->idxReg = idxReg;
7047
7048 /*
7049 * Load it off the stack if we've got a stack slot.
7050 */
7051 uint8_t const idxStackSlot = pVar->idxStackSlot;
7052 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7053 {
7054 Assert(fInitialized);
7055 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7056 switch (pVar->cbVar)
7057 {
7058 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7059 default: AssertFailed(); RT_FALL_THRU();
7060 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7061 }
7062 }
7063 else
7064 {
7065 Assert(idxStackSlot == UINT8_MAX);
7066 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7067 }
7068 pVar->fRegAcquired = true;
7069 return idxReg;
7070}
7071#endif
7072
7073
7074/**
7075 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7076 * guest register.
7077 *
7078 * This function makes sure there is a register for it and sets it to be the
7079 * current shadow copy of @a enmGstReg.
7080 *
7081 * @returns The host register number.
7082 * @param pReNative The recompiler state.
7083 * @param idxVar The variable.
7084 * @param enmGstReg The guest register this variable will be written to
7085 * after this call.
7086 * @param poff Pointer to the instruction buffer offset.
7087 * In case a register needs to be freed up or if the
7088 * variable content needs to be loaded off the stack.
7089 *
7090 * @note We DO NOT expect @a idxVar to be an argument variable,
7091 * because we can only in the commit stage of an instruction when this
7092 * function is used.
7093 */
7094DECL_HIDDEN_THROW(uint8_t)
7095iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7096{
7097 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7098 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7099 Assert(!pVar->fRegAcquired);
7100 AssertMsgStmt( pVar->cbVar <= 8
7101 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7102 || pVar->enmKind == kIemNativeVarKind_Stack),
7103 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7104 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7105 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7106
7107 /*
7108 * This shouldn't ever be used for arguments, unless it's in a weird else
7109 * branch that doesn't do any calling and even then it's questionable.
7110 *
7111 * However, in case someone writes crazy wrong MC code and does register
7112 * updates before making calls, just use the regular register allocator to
7113 * ensure we get a register suitable for the intended argument number.
7114 */
7115 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7116
7117 /*
7118 * If there is already a register for the variable, we transfer/set the
7119 * guest shadow copy assignment to it.
7120 */
7121 uint8_t idxReg = pVar->idxReg;
7122 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7123 {
7124#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7125 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7126 {
7127# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7128 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7129 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7130# endif
7131 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7132 }
7133#endif
7134
7135 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7136 {
7137 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7138 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7139 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7140 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7141 }
7142 else
7143 {
7144 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7145 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7146 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7147 }
7148 /** @todo figure this one out. We need some way of making sure the register isn't
7149 * modified after this point, just in case we start writing crappy MC code. */
7150 pVar->enmGstReg = enmGstReg;
7151 pVar->fRegAcquired = true;
7152 return idxReg;
7153 }
7154 Assert(pVar->uArgNo == UINT8_MAX);
7155
7156 /*
7157 * Because this is supposed to be the commit stage, we're just tag along with the
7158 * temporary register allocator and upgrade it to a variable register.
7159 */
7160 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7161 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7162 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7163 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7164 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7165 pVar->idxReg = idxReg;
7166
7167 /*
7168 * Now we need to load the register value.
7169 */
7170 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7171 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7172 else
7173 {
7174 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7175 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7176 switch (pVar->cbVar)
7177 {
7178 case sizeof(uint64_t):
7179 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7180 break;
7181 case sizeof(uint32_t):
7182 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7183 break;
7184 case sizeof(uint16_t):
7185 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7186 break;
7187 case sizeof(uint8_t):
7188 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7189 break;
7190 default:
7191 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7192 }
7193 }
7194
7195 pVar->fRegAcquired = true;
7196 return idxReg;
7197}
7198
7199
7200/**
7201 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7202 *
7203 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7204 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7205 * requirement of flushing anything in volatile host registers when making a
7206 * call.
7207 *
7208 * @returns New @a off value.
7209 * @param pReNative The recompiler state.
7210 * @param off The code buffer position.
7211 * @param fHstRegsNotToSave Set of registers not to save & restore.
7212 */
7213DECL_HIDDEN_THROW(uint32_t)
7214iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7215{
7216 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7217 if (fHstRegs)
7218 {
7219 do
7220 {
7221 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7222 fHstRegs &= ~RT_BIT_32(idxHstReg);
7223
7224 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7225 {
7226 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7228 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7229 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7230 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7231 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7232 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7233 {
7234 case kIemNativeVarKind_Stack:
7235 {
7236 /* Temporarily spill the variable register. */
7237 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7238 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7239 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7240 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7241 continue;
7242 }
7243
7244 case kIemNativeVarKind_Immediate:
7245 case kIemNativeVarKind_VarRef:
7246 case kIemNativeVarKind_GstRegRef:
7247 /* It is weird to have any of these loaded at this point. */
7248 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7249 continue;
7250
7251 case kIemNativeVarKind_End:
7252 case kIemNativeVarKind_Invalid:
7253 break;
7254 }
7255 AssertFailed();
7256 }
7257 else
7258 {
7259 /*
7260 * Allocate a temporary stack slot and spill the register to it.
7261 */
7262 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7263 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7264 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7265 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7266 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7267 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7268 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7269 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7270 }
7271 } while (fHstRegs);
7272 }
7273#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7274
7275 /*
7276 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7277 * which would be more difficult due to spanning multiple stack slots and different sizes
7278 * (besides we only have a limited amount of slots at the moment).
7279 *
7280 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7281 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7282 */
7283 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7284
7285 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7286 if (fHstRegs)
7287 {
7288 do
7289 {
7290 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7291 fHstRegs &= ~RT_BIT_32(idxHstReg);
7292
7293 /* Fixed reserved and temporary registers don't need saving. */
7294 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7295 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7296 continue;
7297
7298 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7299
7300 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7301 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7302 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7303 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7304 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7305 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7306 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7307 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7308 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7309 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7310 {
7311 case kIemNativeVarKind_Stack:
7312 {
7313 /* Temporarily spill the variable register. */
7314 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7315 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7316 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7317 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7318 if (cbVar == sizeof(RTUINT128U))
7319 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7320 else
7321 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7322 continue;
7323 }
7324
7325 case kIemNativeVarKind_Immediate:
7326 case kIemNativeVarKind_VarRef:
7327 case kIemNativeVarKind_GstRegRef:
7328 /* It is weird to have any of these loaded at this point. */
7329 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7330 continue;
7331
7332 case kIemNativeVarKind_End:
7333 case kIemNativeVarKind_Invalid:
7334 break;
7335 }
7336 AssertFailed();
7337 } while (fHstRegs);
7338 }
7339#endif
7340 return off;
7341}
7342
7343
7344/**
7345 * Emit code to restore volatile registers after to a call to a helper.
7346 *
7347 * @returns New @a off value.
7348 * @param pReNative The recompiler state.
7349 * @param off The code buffer position.
7350 * @param fHstRegsNotToSave Set of registers not to save & restore.
7351 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7352 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7353 */
7354DECL_HIDDEN_THROW(uint32_t)
7355iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7356{
7357 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7358 if (fHstRegs)
7359 {
7360 do
7361 {
7362 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7363 fHstRegs &= ~RT_BIT_32(idxHstReg);
7364
7365 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7366 {
7367 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7368 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7369 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7370 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7371 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7372 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7373 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7374 {
7375 case kIemNativeVarKind_Stack:
7376 {
7377 /* Unspill the variable register. */
7378 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7379 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7380 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7381 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7382 continue;
7383 }
7384
7385 case kIemNativeVarKind_Immediate:
7386 case kIemNativeVarKind_VarRef:
7387 case kIemNativeVarKind_GstRegRef:
7388 /* It is weird to have any of these loaded at this point. */
7389 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7390 continue;
7391
7392 case kIemNativeVarKind_End:
7393 case kIemNativeVarKind_Invalid:
7394 break;
7395 }
7396 AssertFailed();
7397 }
7398 else
7399 {
7400 /*
7401 * Restore from temporary stack slot.
7402 */
7403 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7404 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7405 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7406 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7407
7408 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7409 }
7410 } while (fHstRegs);
7411 }
7412#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7413 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7414 if (fHstRegs)
7415 {
7416 do
7417 {
7418 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7419 fHstRegs &= ~RT_BIT_32(idxHstReg);
7420
7421 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7422 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7423 continue;
7424 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7425
7426 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7428 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7429 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7430 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7431 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7432 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7433 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7434 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7435 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7436 {
7437 case kIemNativeVarKind_Stack:
7438 {
7439 /* Unspill the variable register. */
7440 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7441 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7442 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7443 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7444
7445 if (cbVar == sizeof(RTUINT128U))
7446 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7447 else
7448 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7449 continue;
7450 }
7451
7452 case kIemNativeVarKind_Immediate:
7453 case kIemNativeVarKind_VarRef:
7454 case kIemNativeVarKind_GstRegRef:
7455 /* It is weird to have any of these loaded at this point. */
7456 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7457 continue;
7458
7459 case kIemNativeVarKind_End:
7460 case kIemNativeVarKind_Invalid:
7461 break;
7462 }
7463 AssertFailed();
7464 } while (fHstRegs);
7465 }
7466#endif
7467 return off;
7468}
7469
7470
7471/**
7472 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7473 *
7474 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7475 *
7476 * ASSUMES that @a idxVar is valid and unpacked.
7477 */
7478DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7479{
7480 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7481 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7482 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7483 {
7484 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7485 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7486 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7487 Assert(cSlots > 0);
7488 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7489 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7490 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7491 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7492 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7493 }
7494 else
7495 Assert(idxStackSlot == UINT8_MAX);
7496}
7497
7498
7499/**
7500 * Worker that frees a single variable.
7501 *
7502 * ASSUMES that @a idxVar is valid and unpacked.
7503 */
7504DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7505{
7506 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7507 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7508 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7509
7510 /* Free the host register first if any assigned. */
7511 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7512#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7513 if ( idxHstReg != UINT8_MAX
7514 && pReNative->Core.aVars[idxVar].fSimdReg)
7515 {
7516 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7517 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7518 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7519 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7520 }
7521 else
7522#endif
7523 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7524 {
7525 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7526 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7527 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7528 }
7529
7530 /* Free argument mapping. */
7531 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7532 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7533 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7534
7535 /* Free the stack slots. */
7536 iemNativeVarFreeStackSlots(pReNative, idxVar);
7537
7538 /* Free the actual variable. */
7539 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7540 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7541}
7542
7543
7544/**
7545 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7546 */
7547DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7548{
7549 while (bmVars != 0)
7550 {
7551 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7552 bmVars &= ~RT_BIT_32(idxVar);
7553
7554#if 1 /** @todo optimize by simplifying this later... */
7555 iemNativeVarFreeOneWorker(pReNative, idxVar);
7556#else
7557 /* Only need to free the host register, the rest is done as bulk updates below. */
7558 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7559 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7560 {
7561 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7562 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7563 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7564 }
7565#endif
7566 }
7567#if 0 /** @todo optimize by simplifying this later... */
7568 pReNative->Core.bmVars = 0;
7569 pReNative->Core.bmStack = 0;
7570 pReNative->Core.u64ArgVars = UINT64_MAX;
7571#endif
7572}
7573
7574
7575
7576/*********************************************************************************************************************************
7577* Emitters for IEM_MC_CALL_CIMPL_XXX *
7578*********************************************************************************************************************************/
7579
7580/**
7581 * Emits code to load a reference to the given guest register into @a idxGprDst.
7582 */
7583DECL_HIDDEN_THROW(uint32_t)
7584iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7585 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7586{
7587#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7588 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7589#endif
7590
7591 /*
7592 * Get the offset relative to the CPUMCTX structure.
7593 */
7594 uint32_t offCpumCtx;
7595 switch (enmClass)
7596 {
7597 case kIemNativeGstRegRef_Gpr:
7598 Assert(idxRegInClass < 16);
7599 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7600 break;
7601
7602 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7603 Assert(idxRegInClass < 4);
7604 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7605 break;
7606
7607 case kIemNativeGstRegRef_EFlags:
7608 Assert(idxRegInClass == 0);
7609 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7610 break;
7611
7612 case kIemNativeGstRegRef_MxCsr:
7613 Assert(idxRegInClass == 0);
7614 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7615 break;
7616
7617 case kIemNativeGstRegRef_FpuReg:
7618 Assert(idxRegInClass < 8);
7619 AssertFailed(); /** @todo what kind of indexing? */
7620 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7621 break;
7622
7623 case kIemNativeGstRegRef_MReg:
7624 Assert(idxRegInClass < 8);
7625 AssertFailed(); /** @todo what kind of indexing? */
7626 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7627 break;
7628
7629 case kIemNativeGstRegRef_XReg:
7630 Assert(idxRegInClass < 16);
7631 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7632 break;
7633
7634 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7635 Assert(idxRegInClass == 0);
7636 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7637 break;
7638
7639 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7640 Assert(idxRegInClass == 0);
7641 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7642 break;
7643
7644 default:
7645 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7646 }
7647
7648 /*
7649 * Load the value into the destination register.
7650 */
7651#ifdef RT_ARCH_AMD64
7652 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7653
7654#elif defined(RT_ARCH_ARM64)
7655 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7656 Assert(offCpumCtx < 4096);
7657 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7658
7659#else
7660# error "Port me!"
7661#endif
7662
7663 return off;
7664}
7665
7666
7667/**
7668 * Common code for CIMPL and AIMPL calls.
7669 *
7670 * These are calls that uses argument variables and such. They should not be
7671 * confused with internal calls required to implement an MC operation,
7672 * like a TLB load and similar.
7673 *
7674 * Upon return all that is left to do is to load any hidden arguments and
7675 * perform the call. All argument variables are freed.
7676 *
7677 * @returns New code buffer offset; throws VBox status code on error.
7678 * @param pReNative The native recompile state.
7679 * @param off The code buffer offset.
7680 * @param cArgs The total nubmer of arguments (includes hidden
7681 * count).
7682 * @param cHiddenArgs The number of hidden arguments. The hidden
7683 * arguments must not have any variable declared for
7684 * them, whereas all the regular arguments must
7685 * (tstIEMCheckMc ensures this).
7686 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7687 * this will still flush pending writes in call volatile registers if false.
7688 */
7689DECL_HIDDEN_THROW(uint32_t)
7690iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7691 bool fFlushPendingWrites /*= true*/)
7692{
7693#ifdef VBOX_STRICT
7694 /*
7695 * Assert sanity.
7696 */
7697 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7698 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7699 for (unsigned i = 0; i < cHiddenArgs; i++)
7700 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7701 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7702 {
7703 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7704 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7705 }
7706 iemNativeRegAssertSanity(pReNative);
7707#endif
7708
7709 /* We don't know what the called function makes use of, so flush any pending register writes. */
7710 RT_NOREF(fFlushPendingWrites);
7711#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7712 if (fFlushPendingWrites)
7713#endif
7714 off = iemNativeRegFlushPendingWrites(pReNative, off);
7715
7716 /*
7717 * Before we do anything else, go over variables that are referenced and
7718 * make sure they are not in a register.
7719 */
7720 uint32_t bmVars = pReNative->Core.bmVars;
7721 if (bmVars)
7722 {
7723 do
7724 {
7725 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7726 bmVars &= ~RT_BIT_32(idxVar);
7727
7728 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7729 {
7730 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7731#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7732 if ( idxRegOld != UINT8_MAX
7733 && pReNative->Core.aVars[idxVar].fSimdReg)
7734 {
7735 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7736 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7737
7738 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7739 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7740 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7741 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7742 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7743 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7744 else
7745 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7746
7747 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7748 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7749
7750 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7751 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7752 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7753 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7754 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7755 }
7756 else
7757#endif
7758 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7759 {
7760 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7761 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7762 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7763 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7764 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7765
7766 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7767 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7768 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7769 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7770 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7771 }
7772 }
7773 } while (bmVars != 0);
7774#if 0 //def VBOX_STRICT
7775 iemNativeRegAssertSanity(pReNative);
7776#endif
7777 }
7778
7779 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7780
7781#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7782 /*
7783 * At the very first step go over the host registers that will be used for arguments
7784 * don't shadow anything which needs writing back first.
7785 */
7786 for (uint32_t i = 0; i < cRegArgs; i++)
7787 {
7788 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7789
7790 /* Writeback any dirty guest shadows before using this register. */
7791 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7792 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7793 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7794 }
7795#endif
7796
7797 /*
7798 * First, go over the host registers that will be used for arguments and make
7799 * sure they either hold the desired argument or are free.
7800 */
7801 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7802 {
7803 for (uint32_t i = 0; i < cRegArgs; i++)
7804 {
7805 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7806 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7807 {
7808 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7809 {
7810 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7811 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7812 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7813 Assert(pVar->idxReg == idxArgReg);
7814 uint8_t const uArgNo = pVar->uArgNo;
7815 if (uArgNo == i)
7816 { /* prefect */ }
7817 /* The variable allocator logic should make sure this is impossible,
7818 except for when the return register is used as a parameter (ARM,
7819 but not x86). */
7820#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7821 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7822 {
7823# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7824# error "Implement this"
7825# endif
7826 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7827 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7828 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7829 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7830 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7831 }
7832#endif
7833 else
7834 {
7835 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7836
7837 if (pVar->enmKind == kIemNativeVarKind_Stack)
7838 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7839 else
7840 {
7841 /* just free it, can be reloaded if used again */
7842 pVar->idxReg = UINT8_MAX;
7843 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7844 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7845 }
7846 }
7847 }
7848 else
7849 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7850 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7851 }
7852 }
7853#if 0 //def VBOX_STRICT
7854 iemNativeRegAssertSanity(pReNative);
7855#endif
7856 }
7857
7858 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7859
7860#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7861 /*
7862 * If there are any stack arguments, make sure they are in their place as well.
7863 *
7864 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7865 * the caller) be loading it later and it must be free (see first loop).
7866 */
7867 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7868 {
7869 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7870 {
7871 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7872 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7873 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7874 {
7875 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7876 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7877 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7878 pVar->idxReg = UINT8_MAX;
7879 }
7880 else
7881 {
7882 /* Use ARG0 as temp for stuff we need registers for. */
7883 switch (pVar->enmKind)
7884 {
7885 case kIemNativeVarKind_Stack:
7886 {
7887 uint8_t const idxStackSlot = pVar->idxStackSlot;
7888 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7889 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7890 iemNativeStackCalcBpDisp(idxStackSlot));
7891 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7892 continue;
7893 }
7894
7895 case kIemNativeVarKind_Immediate:
7896 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7897 continue;
7898
7899 case kIemNativeVarKind_VarRef:
7900 {
7901 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7902 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7903 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7904 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7905 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7906# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7907 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
7908 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
7909 if ( fSimdReg
7910 && idxRegOther != UINT8_MAX)
7911 {
7912 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7913 if (cbVar == sizeof(RTUINT128U))
7914 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
7915 else
7916 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
7917 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7918 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7919 }
7920 else
7921# endif
7922 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7923 {
7924 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7925 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7926 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7927 }
7928 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7929 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7930 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7931 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7932 continue;
7933 }
7934
7935 case kIemNativeVarKind_GstRegRef:
7936 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7937 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
7938 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7939 continue;
7940
7941 case kIemNativeVarKind_Invalid:
7942 case kIemNativeVarKind_End:
7943 break;
7944 }
7945 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7946 }
7947 }
7948# if 0 //def VBOX_STRICT
7949 iemNativeRegAssertSanity(pReNative);
7950# endif
7951 }
7952#else
7953 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7954#endif
7955
7956 /*
7957 * Make sure the argument variables are loaded into their respective registers.
7958 *
7959 * We can optimize this by ASSUMING that any register allocations are for
7960 * registeres that have already been loaded and are ready. The previous step
7961 * saw to that.
7962 */
7963 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7964 {
7965 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7966 {
7967 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7968 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7969 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
7970 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
7971 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
7972 else
7973 {
7974 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7975 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7976 {
7977 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7978 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
7979 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
7980 | RT_BIT_32(idxArgReg);
7981 pVar->idxReg = idxArgReg;
7982 }
7983 else
7984 {
7985 /* Use ARG0 as temp for stuff we need registers for. */
7986 switch (pVar->enmKind)
7987 {
7988 case kIemNativeVarKind_Stack:
7989 {
7990 uint8_t const idxStackSlot = pVar->idxStackSlot;
7991 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7992 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
7993 continue;
7994 }
7995
7996 case kIemNativeVarKind_Immediate:
7997 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
7998 continue;
7999
8000 case kIemNativeVarKind_VarRef:
8001 {
8002 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8003 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8004 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8005 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8006 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8007 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8008#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8009 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8010 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8011 if ( fSimdReg
8012 && idxRegOther != UINT8_MAX)
8013 {
8014 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8015 if (cbVar == sizeof(RTUINT128U))
8016 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8017 else
8018 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8019 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8020 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8021 }
8022 else
8023#endif
8024 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8025 {
8026 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8027 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8028 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8029 }
8030 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8031 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8032 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8033 continue;
8034 }
8035
8036 case kIemNativeVarKind_GstRegRef:
8037 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8038 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8039 continue;
8040
8041 case kIemNativeVarKind_Invalid:
8042 case kIemNativeVarKind_End:
8043 break;
8044 }
8045 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8046 }
8047 }
8048 }
8049#if 0 //def VBOX_STRICT
8050 iemNativeRegAssertSanity(pReNative);
8051#endif
8052 }
8053#ifdef VBOX_STRICT
8054 else
8055 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8056 {
8057 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8058 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8059 }
8060#endif
8061
8062 /*
8063 * Free all argument variables (simplified).
8064 * Their lifetime always expires with the call they are for.
8065 */
8066 /** @todo Make the python script check that arguments aren't used after
8067 * IEM_MC_CALL_XXXX. */
8068 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8069 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8070 * an argument value. There is also some FPU stuff. */
8071 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8072 {
8073 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8074 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8075
8076 /* no need to free registers: */
8077 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8078 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8079 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8080 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8081 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8082 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8083
8084 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8085 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8086 iemNativeVarFreeStackSlots(pReNative, idxVar);
8087 }
8088 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8089
8090 /*
8091 * Flush volatile registers as we make the call.
8092 */
8093 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8094
8095 return off;
8096}
8097
8098
8099
8100/*********************************************************************************************************************************
8101* TLB Lookup. *
8102*********************************************************************************************************************************/
8103
8104/**
8105 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8106 */
8107DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8108{
8109 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8110 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8111 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8112 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8113
8114 /* Do the lookup manually. */
8115 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8116 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8117 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8118 if (RT_LIKELY(pTlbe->uTag == uTag))
8119 {
8120 /*
8121 * Check TLB page table level access flags.
8122 */
8123 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8124 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8125 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8126 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8127 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8128 | IEMTLBE_F_PG_UNASSIGNED
8129 | IEMTLBE_F_PT_NO_ACCESSED
8130 | fNoWriteNoDirty | fNoUser);
8131 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8132 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8133 {
8134 /*
8135 * Return the address.
8136 */
8137 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8138 if ((uintptr_t)pbAddr == uResult)
8139 return;
8140 RT_NOREF(cbMem);
8141 AssertFailed();
8142 }
8143 else
8144 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8145 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8146 }
8147 else
8148 AssertFailed();
8149 RT_BREAKPOINT();
8150}
8151
8152/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8153
8154
8155
8156/*********************************************************************************************************************************
8157* Recompiler Core. *
8158*********************************************************************************************************************************/
8159
8160/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8161static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8162{
8163 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8164 pDis->cbCachedInstr += cbMaxRead;
8165 RT_NOREF(cbMinRead);
8166 return VERR_NO_DATA;
8167}
8168
8169
8170DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8171{
8172 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8173 {
8174#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8175 ENTRY(fLocalForcedActions),
8176 ENTRY(iem.s.rcPassUp),
8177 ENTRY(iem.s.fExec),
8178 ENTRY(iem.s.pbInstrBuf),
8179 ENTRY(iem.s.uInstrBufPc),
8180 ENTRY(iem.s.GCPhysInstrBuf),
8181 ENTRY(iem.s.cbInstrBufTotal),
8182 ENTRY(iem.s.idxTbCurInstr),
8183#ifdef VBOX_WITH_STATISTICS
8184 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8185 ENTRY(iem.s.StatNativeTlbHitsForStore),
8186 ENTRY(iem.s.StatNativeTlbHitsForStack),
8187 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8188 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8189 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8190 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8191 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8192#endif
8193 ENTRY(iem.s.DataTlb.aEntries),
8194 ENTRY(iem.s.DataTlb.uTlbRevision),
8195 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8196 ENTRY(iem.s.DataTlb.cTlbHits),
8197 ENTRY(iem.s.CodeTlb.aEntries),
8198 ENTRY(iem.s.CodeTlb.uTlbRevision),
8199 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8200 ENTRY(iem.s.CodeTlb.cTlbHits),
8201 ENTRY(pVMR3),
8202 ENTRY(cpum.GstCtx.rax),
8203 ENTRY(cpum.GstCtx.ah),
8204 ENTRY(cpum.GstCtx.rcx),
8205 ENTRY(cpum.GstCtx.ch),
8206 ENTRY(cpum.GstCtx.rdx),
8207 ENTRY(cpum.GstCtx.dh),
8208 ENTRY(cpum.GstCtx.rbx),
8209 ENTRY(cpum.GstCtx.bh),
8210 ENTRY(cpum.GstCtx.rsp),
8211 ENTRY(cpum.GstCtx.rbp),
8212 ENTRY(cpum.GstCtx.rsi),
8213 ENTRY(cpum.GstCtx.rdi),
8214 ENTRY(cpum.GstCtx.r8),
8215 ENTRY(cpum.GstCtx.r9),
8216 ENTRY(cpum.GstCtx.r10),
8217 ENTRY(cpum.GstCtx.r11),
8218 ENTRY(cpum.GstCtx.r12),
8219 ENTRY(cpum.GstCtx.r13),
8220 ENTRY(cpum.GstCtx.r14),
8221 ENTRY(cpum.GstCtx.r15),
8222 ENTRY(cpum.GstCtx.es.Sel),
8223 ENTRY(cpum.GstCtx.es.u64Base),
8224 ENTRY(cpum.GstCtx.es.u32Limit),
8225 ENTRY(cpum.GstCtx.es.Attr),
8226 ENTRY(cpum.GstCtx.cs.Sel),
8227 ENTRY(cpum.GstCtx.cs.u64Base),
8228 ENTRY(cpum.GstCtx.cs.u32Limit),
8229 ENTRY(cpum.GstCtx.cs.Attr),
8230 ENTRY(cpum.GstCtx.ss.Sel),
8231 ENTRY(cpum.GstCtx.ss.u64Base),
8232 ENTRY(cpum.GstCtx.ss.u32Limit),
8233 ENTRY(cpum.GstCtx.ss.Attr),
8234 ENTRY(cpum.GstCtx.ds.Sel),
8235 ENTRY(cpum.GstCtx.ds.u64Base),
8236 ENTRY(cpum.GstCtx.ds.u32Limit),
8237 ENTRY(cpum.GstCtx.ds.Attr),
8238 ENTRY(cpum.GstCtx.fs.Sel),
8239 ENTRY(cpum.GstCtx.fs.u64Base),
8240 ENTRY(cpum.GstCtx.fs.u32Limit),
8241 ENTRY(cpum.GstCtx.fs.Attr),
8242 ENTRY(cpum.GstCtx.gs.Sel),
8243 ENTRY(cpum.GstCtx.gs.u64Base),
8244 ENTRY(cpum.GstCtx.gs.u32Limit),
8245 ENTRY(cpum.GstCtx.gs.Attr),
8246 ENTRY(cpum.GstCtx.rip),
8247 ENTRY(cpum.GstCtx.eflags),
8248 ENTRY(cpum.GstCtx.uRipInhibitInt),
8249 ENTRY(cpum.GstCtx.cr0),
8250 ENTRY(cpum.GstCtx.cr4),
8251 ENTRY(cpum.GstCtx.aXcr[0]),
8252 ENTRY(cpum.GstCtx.aXcr[1]),
8253#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8254 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8255 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8256 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8257 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8258 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8259 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8260 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8261 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8262 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8263 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8264 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8265 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8266 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8267 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8268 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8269 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8270 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8271 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8272 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8273 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8274 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8275 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8276 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8277 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8278 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8279 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8280 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8281 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8282 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8283 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8284 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8285 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8286#endif
8287#undef ENTRY
8288 };
8289#ifdef VBOX_STRICT
8290 static bool s_fOrderChecked = false;
8291 if (!s_fOrderChecked)
8292 {
8293 s_fOrderChecked = true;
8294 uint32_t offPrev = s_aMembers[0].off;
8295 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8296 {
8297 Assert(s_aMembers[i].off > offPrev);
8298 offPrev = s_aMembers[i].off;
8299 }
8300 }
8301#endif
8302
8303 /*
8304 * Binary lookup.
8305 */
8306 unsigned iStart = 0;
8307 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8308 for (;;)
8309 {
8310 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8311 uint32_t const offCur = s_aMembers[iCur].off;
8312 if (off < offCur)
8313 {
8314 if (iCur != iStart)
8315 iEnd = iCur;
8316 else
8317 break;
8318 }
8319 else if (off > offCur)
8320 {
8321 if (iCur + 1 < iEnd)
8322 iStart = iCur + 1;
8323 else
8324 break;
8325 }
8326 else
8327 return s_aMembers[iCur].pszName;
8328 }
8329#ifdef VBOX_WITH_STATISTICS
8330 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8331 return "iem.s.acThreadedFuncStats[iFn]";
8332#endif
8333 return NULL;
8334}
8335
8336
8337DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8338{
8339 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8340#if defined(RT_ARCH_AMD64)
8341 static const char * const a_apszMarkers[] =
8342 {
8343 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8344 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8345 };
8346#endif
8347
8348 char szDisBuf[512];
8349 DISSTATE Dis;
8350 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8351 uint32_t const cNative = pTb->Native.cInstructions;
8352 uint32_t offNative = 0;
8353#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8354 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8355#endif
8356 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8357 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8358 : DISCPUMODE_64BIT;
8359#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8360 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8361#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8362 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8363#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8364# error "Port me"
8365#else
8366 csh hDisasm = ~(size_t)0;
8367# if defined(RT_ARCH_AMD64)
8368 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8369# elif defined(RT_ARCH_ARM64)
8370 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8371# else
8372# error "Port me"
8373# endif
8374 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8375
8376 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8377 //Assert(rcCs == CS_ERR_OK);
8378#endif
8379
8380 /*
8381 * Print TB info.
8382 */
8383 pHlp->pfnPrintf(pHlp,
8384 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8385 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8386 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8387 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8388#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8389 if (pDbgInfo && pDbgInfo->cEntries > 1)
8390 {
8391 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8392
8393 /*
8394 * This disassembly is driven by the debug info which follows the native
8395 * code and indicates when it starts with the next guest instructions,
8396 * where labels are and such things.
8397 */
8398 uint32_t idxThreadedCall = 0;
8399 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8400 uint8_t idxRange = UINT8_MAX;
8401 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8402 uint32_t offRange = 0;
8403 uint32_t offOpcodes = 0;
8404 uint32_t const cbOpcodes = pTb->cbOpcodes;
8405 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8406 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8407 uint32_t iDbgEntry = 1;
8408 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8409
8410 while (offNative < cNative)
8411 {
8412 /* If we're at or have passed the point where the next chunk of debug
8413 info starts, process it. */
8414 if (offDbgNativeNext <= offNative)
8415 {
8416 offDbgNativeNext = UINT32_MAX;
8417 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8418 {
8419 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8420 {
8421 case kIemTbDbgEntryType_GuestInstruction:
8422 {
8423 /* Did the exec flag change? */
8424 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8425 {
8426 pHlp->pfnPrintf(pHlp,
8427 " fExec change %#08x -> %#08x %s\n",
8428 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8429 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8430 szDisBuf, sizeof(szDisBuf)));
8431 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8432 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8433 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8434 : DISCPUMODE_64BIT;
8435 }
8436
8437 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8438 where the compilation was aborted before the opcode was recorded and the actual
8439 instruction was translated to a threaded call. This may happen when we run out
8440 of ranges, or when some complicated interrupts/FFs are found to be pending or
8441 similar. So, we just deal with it here rather than in the compiler code as it
8442 is a lot simpler to do here. */
8443 if ( idxRange == UINT8_MAX
8444 || idxRange >= cRanges
8445 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8446 {
8447 idxRange += 1;
8448 if (idxRange < cRanges)
8449 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8450 else
8451 continue;
8452 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8453 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8454 + (pTb->aRanges[idxRange].idxPhysPage == 0
8455 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8456 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8457 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8458 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8459 pTb->aRanges[idxRange].idxPhysPage);
8460 GCPhysPc += offRange;
8461 }
8462
8463 /* Disassemble the instruction. */
8464 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8465 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8466 uint32_t cbInstr = 1;
8467 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8468 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8469 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8470 if (RT_SUCCESS(rc))
8471 {
8472 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8473 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8474 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8475 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8476
8477 static unsigned const s_offMarker = 55;
8478 static char const s_szMarker[] = " ; <--- guest";
8479 if (cch < s_offMarker)
8480 {
8481 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8482 cch = s_offMarker;
8483 }
8484 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8485 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8486
8487 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8488 }
8489 else
8490 {
8491 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8492 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8493 cbInstr = 1;
8494 }
8495 GCPhysPc += cbInstr;
8496 offOpcodes += cbInstr;
8497 offRange += cbInstr;
8498 continue;
8499 }
8500
8501 case kIemTbDbgEntryType_ThreadedCall:
8502 pHlp->pfnPrintf(pHlp,
8503 " Call #%u to %s (%u args) - %s\n",
8504 idxThreadedCall,
8505 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8506 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8507 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8508 idxThreadedCall++;
8509 continue;
8510
8511 case kIemTbDbgEntryType_GuestRegShadowing:
8512 {
8513 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8514 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8515 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8516 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8517 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8518 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8519 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8520 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8521 else
8522 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8523 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8524 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8525 continue;
8526 }
8527
8528#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8529 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8530 {
8531 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8532 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8533 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8534 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8535 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8536 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8537 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8538 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8539 else
8540 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8541 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8542 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8543 continue;
8544 }
8545#endif
8546
8547 case kIemTbDbgEntryType_Label:
8548 {
8549 const char *pszName = "what_the_fudge";
8550 const char *pszComment = "";
8551 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8552 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8553 {
8554 case kIemNativeLabelType_Return: pszName = "Return"; break;
8555 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8556 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8557 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8558 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8559 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8560 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8561 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8562 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8563 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8564 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8565 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8566 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8567 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8568 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8569 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8570 case kIemNativeLabelType_If:
8571 pszName = "If";
8572 fNumbered = true;
8573 break;
8574 case kIemNativeLabelType_Else:
8575 pszName = "Else";
8576 fNumbered = true;
8577 pszComment = " ; regs state restored pre-if-block";
8578 break;
8579 case kIemNativeLabelType_Endif:
8580 pszName = "Endif";
8581 fNumbered = true;
8582 break;
8583 case kIemNativeLabelType_CheckIrq:
8584 pszName = "CheckIrq_CheckVM";
8585 fNumbered = true;
8586 break;
8587 case kIemNativeLabelType_TlbLookup:
8588 pszName = "TlbLookup";
8589 fNumbered = true;
8590 break;
8591 case kIemNativeLabelType_TlbMiss:
8592 pszName = "TlbMiss";
8593 fNumbered = true;
8594 break;
8595 case kIemNativeLabelType_TlbDone:
8596 pszName = "TlbDone";
8597 fNumbered = true;
8598 break;
8599 case kIemNativeLabelType_Invalid:
8600 case kIemNativeLabelType_End:
8601 break;
8602 }
8603 if (fNumbered)
8604 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8605 else
8606 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8607 continue;
8608 }
8609
8610 case kIemTbDbgEntryType_NativeOffset:
8611 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8612 Assert(offDbgNativeNext >= offNative);
8613 break;
8614
8615#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8616 case kIemTbDbgEntryType_DelayedPcUpdate:
8617 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8618 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8619 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8620 continue;
8621#endif
8622
8623#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8624 case kIemTbDbgEntryType_GuestRegDirty:
8625 {
8626 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8627 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8628 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8629 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8630 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8631 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8632 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8633 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8634 pszGstReg, pszHstReg);
8635 continue;
8636 }
8637
8638 case kIemTbDbgEntryType_GuestRegWriteback:
8639 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8640 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8641 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
8642 continue;
8643#endif
8644
8645 default:
8646 AssertFailed();
8647 }
8648 iDbgEntry++;
8649 break;
8650 }
8651 }
8652
8653 /*
8654 * Disassemble the next native instruction.
8655 */
8656 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8657# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8658 uint32_t cbInstr = sizeof(paNative[0]);
8659 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8660 if (RT_SUCCESS(rc))
8661 {
8662# if defined(RT_ARCH_AMD64)
8663 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8664 {
8665 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8666 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8667 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8668 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8669 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8670 uInfo & 0x8000 ? "recompiled" : "todo");
8671 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8672 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8673 else
8674 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8675 }
8676 else
8677# endif
8678 {
8679 const char *pszAnnotation = NULL;
8680# ifdef RT_ARCH_AMD64
8681 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8682 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8683 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8684 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8685 PCDISOPPARAM pMemOp;
8686 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8687 pMemOp = &Dis.Param1;
8688 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8689 pMemOp = &Dis.Param2;
8690 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8691 pMemOp = &Dis.Param3;
8692 else
8693 pMemOp = NULL;
8694 if ( pMemOp
8695 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8696 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8697 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8698 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8699
8700#elif defined(RT_ARCH_ARM64)
8701 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8702 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8703 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8704# else
8705# error "Port me"
8706# endif
8707 if (pszAnnotation)
8708 {
8709 static unsigned const s_offAnnotation = 55;
8710 size_t const cchAnnotation = strlen(pszAnnotation);
8711 size_t cchDis = strlen(szDisBuf);
8712 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8713 {
8714 if (cchDis < s_offAnnotation)
8715 {
8716 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8717 cchDis = s_offAnnotation;
8718 }
8719 szDisBuf[cchDis++] = ' ';
8720 szDisBuf[cchDis++] = ';';
8721 szDisBuf[cchDis++] = ' ';
8722 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8723 }
8724 }
8725 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8726 }
8727 }
8728 else
8729 {
8730# if defined(RT_ARCH_AMD64)
8731 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8732 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8733# elif defined(RT_ARCH_ARM64)
8734 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8735# else
8736# error "Port me"
8737# endif
8738 cbInstr = sizeof(paNative[0]);
8739 }
8740 offNative += cbInstr / sizeof(paNative[0]);
8741
8742# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8743 cs_insn *pInstr;
8744 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8745 (uintptr_t)pNativeCur, 1, &pInstr);
8746 if (cInstrs > 0)
8747 {
8748 Assert(cInstrs == 1);
8749 const char *pszAnnotation = NULL;
8750# if defined(RT_ARCH_ARM64)
8751 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8752 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8753 {
8754 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8755 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8756 char *psz = strchr(pInstr->op_str, '[');
8757 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8758 {
8759 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8760 int32_t off = -1;
8761 psz += 4;
8762 if (*psz == ']')
8763 off = 0;
8764 else if (*psz == ',')
8765 {
8766 psz = RTStrStripL(psz + 1);
8767 if (*psz == '#')
8768 off = RTStrToInt32(&psz[1]);
8769 /** @todo deal with index registers and LSL as well... */
8770 }
8771 if (off >= 0)
8772 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8773 }
8774 }
8775# endif
8776
8777 size_t const cchOp = strlen(pInstr->op_str);
8778# if defined(RT_ARCH_AMD64)
8779 if (pszAnnotation)
8780 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8781 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8782 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8783 else
8784 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8785 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8786
8787# else
8788 if (pszAnnotation)
8789 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8790 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8791 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8792 else
8793 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8794 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8795# endif
8796 offNative += pInstr->size / sizeof(*pNativeCur);
8797 cs_free(pInstr, cInstrs);
8798 }
8799 else
8800 {
8801# if defined(RT_ARCH_AMD64)
8802 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8803 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8804# else
8805 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8806# endif
8807 offNative++;
8808 }
8809# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8810 }
8811 }
8812 else
8813#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8814 {
8815 /*
8816 * No debug info, just disassemble the x86 code and then the native code.
8817 *
8818 * First the guest code:
8819 */
8820 for (unsigned i = 0; i < pTb->cRanges; i++)
8821 {
8822 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8823 + (pTb->aRanges[i].idxPhysPage == 0
8824 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8825 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8826 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8827 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8828 unsigned off = pTb->aRanges[i].offOpcodes;
8829 /** @todo this ain't working when crossing pages! */
8830 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8831 while (off < cbOpcodes)
8832 {
8833 uint32_t cbInstr = 1;
8834 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8835 &pTb->pabOpcodes[off], cbOpcodes - off,
8836 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8837 if (RT_SUCCESS(rc))
8838 {
8839 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8840 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8841 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8842 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8843 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8844 GCPhysPc += cbInstr;
8845 off += cbInstr;
8846 }
8847 else
8848 {
8849 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8850 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8851 break;
8852 }
8853 }
8854 }
8855
8856 /*
8857 * Then the native code:
8858 */
8859 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8860 while (offNative < cNative)
8861 {
8862 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8863# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8864 uint32_t cbInstr = sizeof(paNative[0]);
8865 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8866 if (RT_SUCCESS(rc))
8867 {
8868# if defined(RT_ARCH_AMD64)
8869 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8870 {
8871 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8872 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8873 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8874 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8875 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8876 uInfo & 0x8000 ? "recompiled" : "todo");
8877 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8878 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8879 else
8880 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8881 }
8882 else
8883# endif
8884 {
8885# ifdef RT_ARCH_AMD64
8886 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8887 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8888 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8889 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8890# elif defined(RT_ARCH_ARM64)
8891 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8892 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8893 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8894# else
8895# error "Port me"
8896# endif
8897 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8898 }
8899 }
8900 else
8901 {
8902# if defined(RT_ARCH_AMD64)
8903 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8904 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8905# else
8906 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8907# endif
8908 cbInstr = sizeof(paNative[0]);
8909 }
8910 offNative += cbInstr / sizeof(paNative[0]);
8911
8912# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8913 cs_insn *pInstr;
8914 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8915 (uintptr_t)pNativeCur, 1, &pInstr);
8916 if (cInstrs > 0)
8917 {
8918 Assert(cInstrs == 1);
8919# if defined(RT_ARCH_AMD64)
8920 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8921 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8922# else
8923 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8924 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8925# endif
8926 offNative += pInstr->size / sizeof(*pNativeCur);
8927 cs_free(pInstr, cInstrs);
8928 }
8929 else
8930 {
8931# if defined(RT_ARCH_AMD64)
8932 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8933 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8934# else
8935 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8936# endif
8937 offNative++;
8938 }
8939# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8940 }
8941 }
8942
8943#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8944 /* Cleanup. */
8945 cs_close(&hDisasm);
8946#endif
8947}
8948
8949
8950/**
8951 * Recompiles the given threaded TB into a native one.
8952 *
8953 * In case of failure the translation block will be returned as-is.
8954 *
8955 * @returns pTb.
8956 * @param pVCpu The cross context virtual CPU structure of the calling
8957 * thread.
8958 * @param pTb The threaded translation to recompile to native.
8959 */
8960DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8961{
8962 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8963
8964 /*
8965 * The first time thru, we allocate the recompiler state, the other times
8966 * we just need to reset it before using it again.
8967 */
8968 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8969 if (RT_LIKELY(pReNative))
8970 iemNativeReInit(pReNative, pTb);
8971 else
8972 {
8973 pReNative = iemNativeInit(pVCpu, pTb);
8974 AssertReturn(pReNative, pTb);
8975 }
8976
8977#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8978 /*
8979 * First do liveness analysis. This is done backwards.
8980 */
8981 {
8982 uint32_t idxCall = pTb->Thrd.cCalls;
8983 if (idxCall <= pReNative->cLivenessEntriesAlloc)
8984 { /* likely */ }
8985 else
8986 {
8987 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
8988 while (idxCall > cAlloc)
8989 cAlloc *= 2;
8990 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
8991 AssertReturn(pvNew, pTb);
8992 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
8993 pReNative->cLivenessEntriesAlloc = cAlloc;
8994 }
8995 AssertReturn(idxCall > 0, pTb);
8996 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
8997
8998 /* The initial (final) entry. */
8999 idxCall--;
9000 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9001
9002 /* Loop backwards thru the calls and fill in the other entries. */
9003 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9004 while (idxCall > 0)
9005 {
9006 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9007 if (pfnLiveness)
9008 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9009 else
9010 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9011 pCallEntry--;
9012 idxCall--;
9013 }
9014
9015# ifdef VBOX_WITH_STATISTICS
9016 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9017 to 'clobbered' rather that 'input'. */
9018 /** @todo */
9019# endif
9020 }
9021#endif
9022
9023 /*
9024 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9025 * for aborting if an error happens.
9026 */
9027 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9028#ifdef LOG_ENABLED
9029 uint32_t const cCallsOrg = cCallsLeft;
9030#endif
9031 uint32_t off = 0;
9032 int rc = VINF_SUCCESS;
9033 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9034 {
9035 /*
9036 * Emit prolog code (fixed).
9037 */
9038 off = iemNativeEmitProlog(pReNative, off);
9039
9040 /*
9041 * Convert the calls to native code.
9042 */
9043#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9044 int32_t iGstInstr = -1;
9045#endif
9046#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9047 uint32_t cThreadedCalls = 0;
9048 uint32_t cRecompiledCalls = 0;
9049#endif
9050#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9051 uint32_t idxCurCall = 0;
9052#endif
9053 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9054 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9055 while (cCallsLeft-- > 0)
9056 {
9057 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9058#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9059 pReNative->idxCurCall = idxCurCall;
9060#endif
9061
9062 /*
9063 * Debug info, assembly markup and statistics.
9064 */
9065#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9066 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9067 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9068#endif
9069#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9070 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9071 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9072 {
9073 if (iGstInstr < (int32_t)pTb->cInstructions)
9074 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9075 else
9076 Assert(iGstInstr == pTb->cInstructions);
9077 iGstInstr = pCallEntry->idxInstr;
9078 }
9079 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9080#endif
9081#if defined(VBOX_STRICT)
9082 off = iemNativeEmitMarker(pReNative, off,
9083 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9084#endif
9085#if defined(VBOX_STRICT)
9086 iemNativeRegAssertSanity(pReNative);
9087#endif
9088#ifdef VBOX_WITH_STATISTICS
9089 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9090#endif
9091
9092 /*
9093 * Actual work.
9094 */
9095 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9096 pfnRecom ? "(recompiled)" : "(todo)"));
9097 if (pfnRecom) /** @todo stats on this. */
9098 {
9099 off = pfnRecom(pReNative, off, pCallEntry);
9100 STAM_REL_STATS({cRecompiledCalls++;});
9101 }
9102 else
9103 {
9104 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9105 STAM_REL_STATS({cThreadedCalls++;});
9106 }
9107 Assert(off <= pReNative->cInstrBufAlloc);
9108 Assert(pReNative->cCondDepth == 0);
9109
9110#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9111 if (LogIs2Enabled())
9112 {
9113 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9114# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9115 static const char s_achState[] = "CUXI";
9116# else
9117 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9118# endif
9119
9120 char szGpr[17];
9121 for (unsigned i = 0; i < 16; i++)
9122 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9123 szGpr[16] = '\0';
9124
9125 char szSegBase[X86_SREG_COUNT + 1];
9126 char szSegLimit[X86_SREG_COUNT + 1];
9127 char szSegAttrib[X86_SREG_COUNT + 1];
9128 char szSegSel[X86_SREG_COUNT + 1];
9129 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9130 {
9131 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9132 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9133 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9134 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9135 }
9136 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9137 = szSegSel[X86_SREG_COUNT] = '\0';
9138
9139 char szEFlags[8];
9140 for (unsigned i = 0; i < 7; i++)
9141 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9142 szEFlags[7] = '\0';
9143
9144 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9145 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9146 }
9147#endif
9148
9149 /*
9150 * Advance.
9151 */
9152 pCallEntry++;
9153#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9154 idxCurCall++;
9155#endif
9156 }
9157
9158 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9159 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9160 if (!cThreadedCalls)
9161 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9162
9163 /*
9164 * Emit the epilog code.
9165 */
9166 uint32_t idxReturnLabel;
9167 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9168
9169 /*
9170 * Generate special jump labels.
9171 */
9172 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9173 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9174 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9175 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9176
9177 /*
9178 * Generate simple TB tail labels that just calls a help with a pVCpu
9179 * arg and either return or longjmps/throws a non-zero status.
9180 *
9181 * The array entries must be ordered by enmLabel value so we can index
9182 * using fTailLabels bit numbers.
9183 */
9184 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9185 static struct
9186 {
9187 IEMNATIVELABELTYPE enmLabel;
9188 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9189 } const g_aSimpleTailLabels[] =
9190 {
9191 { kIemNativeLabelType_Invalid, NULL },
9192 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9193 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9194 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9195 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9196 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9197 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9198 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9199 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9200 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9201 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9202 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9203 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9204 };
9205 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9206 AssertCompile(kIemNativeLabelType_Invalid == 0);
9207 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9208 if (fTailLabels)
9209 {
9210 do
9211 {
9212 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9213 fTailLabels &= ~RT_BIT_64(enmLabel);
9214 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9215
9216 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9217 Assert(idxLabel != UINT32_MAX);
9218 if (idxLabel != UINT32_MAX)
9219 {
9220 iemNativeLabelDefine(pReNative, idxLabel, off);
9221
9222 /* int pfnCallback(PVMCPUCC pVCpu) */
9223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9224 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9225
9226 /* jump back to the return sequence. */
9227 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9228 }
9229
9230 } while (fTailLabels);
9231 }
9232 }
9233 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9234 {
9235 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9236 return pTb;
9237 }
9238 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9239 Assert(off <= pReNative->cInstrBufAlloc);
9240
9241 /*
9242 * Make sure all labels has been defined.
9243 */
9244 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9245#ifdef VBOX_STRICT
9246 uint32_t const cLabels = pReNative->cLabels;
9247 for (uint32_t i = 0; i < cLabels; i++)
9248 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9249#endif
9250
9251 /*
9252 * Allocate executable memory, copy over the code we've generated.
9253 */
9254 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9255 if (pTbAllocator->pDelayedFreeHead)
9256 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9257
9258 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9259 AssertReturn(paFinalInstrBuf, pTb);
9260 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9261
9262 /*
9263 * Apply fixups.
9264 */
9265 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9266 uint32_t const cFixups = pReNative->cFixups;
9267 for (uint32_t i = 0; i < cFixups; i++)
9268 {
9269 Assert(paFixups[i].off < off);
9270 Assert(paFixups[i].idxLabel < cLabels);
9271 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9272 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9273 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9274 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9275 switch (paFixups[i].enmType)
9276 {
9277#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9278 case kIemNativeFixupType_Rel32:
9279 Assert(paFixups[i].off + 4 <= off);
9280 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9281 continue;
9282
9283#elif defined(RT_ARCH_ARM64)
9284 case kIemNativeFixupType_RelImm26At0:
9285 {
9286 Assert(paFixups[i].off < off);
9287 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9288 Assert(offDisp >= -262144 && offDisp < 262144);
9289 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9290 continue;
9291 }
9292
9293 case kIemNativeFixupType_RelImm19At5:
9294 {
9295 Assert(paFixups[i].off < off);
9296 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9297 Assert(offDisp >= -262144 && offDisp < 262144);
9298 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9299 continue;
9300 }
9301
9302 case kIemNativeFixupType_RelImm14At5:
9303 {
9304 Assert(paFixups[i].off < off);
9305 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9306 Assert(offDisp >= -8192 && offDisp < 8192);
9307 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9308 continue;
9309 }
9310
9311#endif
9312 case kIemNativeFixupType_Invalid:
9313 case kIemNativeFixupType_End:
9314 break;
9315 }
9316 AssertFailed();
9317 }
9318
9319 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9320 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9321
9322 /*
9323 * Convert the translation block.
9324 */
9325 RTMemFree(pTb->Thrd.paCalls);
9326 pTb->Native.paInstructions = paFinalInstrBuf;
9327 pTb->Native.cInstructions = off;
9328 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9329#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9330 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9331 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9332#endif
9333
9334 Assert(pTbAllocator->cThreadedTbs > 0);
9335 pTbAllocator->cThreadedTbs -= 1;
9336 pTbAllocator->cNativeTbs += 1;
9337 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9338
9339#ifdef LOG_ENABLED
9340 /*
9341 * Disassemble to the log if enabled.
9342 */
9343 if (LogIs3Enabled())
9344 {
9345 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9346 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9347# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9348 RTLogFlush(NULL);
9349# endif
9350 }
9351#endif
9352 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9353
9354 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9355 return pTb;
9356}
9357
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette