VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104281

Last change on this file since 104281 was 104281, checked in by vboxsync, 8 months ago

VMM/IEM: Gather statistics on the exit behavior of native TBs, bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 412.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104281 2024-04-10 17:21:10Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 iemRaiseDivideErrorJmp(pVCpu);
138#ifndef _MSC_VER
139 return VINF_IEM_RAISED_XCPT; /* not reached */
140#endif
141}
142
143
144/**
145 * Used by TB code when it wants to raise a \#UD.
146 */
147IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
148{
149 iemRaiseUndefinedOpcodeJmp(pVCpu);
150#ifndef _MSC_VER
151 return VINF_IEM_RAISED_XCPT; /* not reached */
152#endif
153}
154
155
156/**
157 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
158 *
159 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
160 */
161IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
162{
163 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
164 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
165 iemRaiseUndefinedOpcodeJmp(pVCpu);
166 else
167 iemRaiseDeviceNotAvailableJmp(pVCpu);
168#ifndef _MSC_VER
169 return VINF_IEM_RAISED_XCPT; /* not reached */
170#endif
171}
172
173
174/**
175 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
176 *
177 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
178 */
179IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
180{
181 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
182 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
183 iemRaiseUndefinedOpcodeJmp(pVCpu);
184 else
185 iemRaiseDeviceNotAvailableJmp(pVCpu);
186#ifndef _MSC_VER
187 return VINF_IEM_RAISED_XCPT; /* not reached */
188#endif
189}
190
191
192/**
193 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
194 *
195 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
196 */
197IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
198{
199 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
200 iemRaiseSimdFpExceptionJmp(pVCpu);
201 else
202 iemRaiseUndefinedOpcodeJmp(pVCpu);
203#ifndef _MSC_VER
204 return VINF_IEM_RAISED_XCPT; /* not reached */
205#endif
206}
207
208
209/**
210 * Used by TB code when it wants to raise a \#NM.
211 */
212IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
213{
214 iemRaiseDeviceNotAvailableJmp(pVCpu);
215#ifndef _MSC_VER
216 return VINF_IEM_RAISED_XCPT; /* not reached */
217#endif
218}
219
220
221/**
222 * Used by TB code when it wants to raise a \#GP(0).
223 */
224IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
225{
226 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
227#ifndef _MSC_VER
228 return VINF_IEM_RAISED_XCPT; /* not reached */
229#endif
230}
231
232
233/**
234 * Used by TB code when it wants to raise a \#MF.
235 */
236IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
237{
238 iemRaiseMathFaultJmp(pVCpu);
239#ifndef _MSC_VER
240 return VINF_IEM_RAISED_XCPT; /* not reached */
241#endif
242}
243
244
245/**
246 * Used by TB code when it wants to raise a \#XF.
247 */
248IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
249{
250 iemRaiseSimdFpExceptionJmp(pVCpu);
251#ifndef _MSC_VER
252 return VINF_IEM_RAISED_XCPT; /* not reached */
253#endif
254}
255
256
257/**
258 * Used by TB code when detecting opcode changes.
259 * @see iemThreadeFuncWorkerObsoleteTb
260 */
261IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
262{
263 /* We set fSafeToFree to false where as we're being called in the context
264 of a TB callback function, which for native TBs means we cannot release
265 the executable memory till we've returned our way back to iemTbExec as
266 that return path codes via the native code generated for the TB. */
267 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
268 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
269 return VINF_IEM_REEXEC_BREAK;
270}
271
272
273/**
274 * Used by TB code when we need to switch to a TB with CS.LIM checking.
275 */
276IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
277{
278 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
279 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
280 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
281 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
282 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
283 return VINF_IEM_REEXEC_BREAK;
284}
285
286
287/**
288 * Used by TB code when we missed a PC check after a branch.
289 */
290IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
291{
292 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
293 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
294 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
295 pVCpu->iem.s.pbInstrBuf));
296 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
297 return VINF_IEM_REEXEC_BREAK;
298}
299
300
301
302/*********************************************************************************************************************************
303* Helpers: Segmented memory fetches and stores. *
304*********************************************************************************************************************************/
305
306/**
307 * Used by TB code to load unsigned 8-bit data w/ segmentation.
308 */
309IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
310{
311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
312 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
313#else
314 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
315#endif
316}
317
318
319/**
320 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
321 * to 16 bits.
322 */
323IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
324{
325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
326 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
327#else
328 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
329#endif
330}
331
332
333/**
334 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
335 * to 32 bits.
336 */
337IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
338{
339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
340 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
341#else
342 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
343#endif
344}
345
346/**
347 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
348 * to 64 bits.
349 */
350IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
351{
352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
353 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
354#else
355 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
356#endif
357}
358
359
360/**
361 * Used by TB code to load unsigned 16-bit data w/ segmentation.
362 */
363IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
364{
365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
366 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
367#else
368 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
369#endif
370}
371
372
373/**
374 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
375 * to 32 bits.
376 */
377IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
378{
379#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
380 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
381#else
382 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
383#endif
384}
385
386
387/**
388 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
389 * to 64 bits.
390 */
391IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
392{
393#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
394 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
395#else
396 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
397#endif
398}
399
400
401/**
402 * Used by TB code to load unsigned 32-bit data w/ segmentation.
403 */
404IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
405{
406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
407 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
408#else
409 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
410#endif
411}
412
413
414/**
415 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
416 * to 64 bits.
417 */
418IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
419{
420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
421 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
422#else
423 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
424#endif
425}
426
427
428/**
429 * Used by TB code to load unsigned 64-bit data w/ segmentation.
430 */
431IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
432{
433#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
434 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
435#else
436 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
437#endif
438}
439
440
441#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
442/**
443 * Used by TB code to load 128-bit data w/ segmentation.
444 */
445IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
446{
447#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
448 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
449#else
450 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
451#endif
452}
453
454
455/**
456 * Used by TB code to load 128-bit data w/ segmentation.
457 */
458IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
459{
460#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
461 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
462#else
463 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
464#endif
465}
466
467
468/**
469 * Used by TB code to load 128-bit data w/ segmentation.
470 */
471IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
472{
473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
474 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
475#else
476 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
477#endif
478}
479
480
481/**
482 * Used by TB code to load 256-bit data w/ segmentation.
483 */
484IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
485{
486#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
487 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
488#else
489 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
490#endif
491}
492
493
494/**
495 * Used by TB code to load 256-bit data w/ segmentation.
496 */
497IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
498{
499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
500 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
501#else
502 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
503#endif
504}
505#endif
506
507
508/**
509 * Used by TB code to store unsigned 8-bit data w/ segmentation.
510 */
511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
512{
513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
514 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
515#else
516 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
517#endif
518}
519
520
521/**
522 * Used by TB code to store unsigned 16-bit data w/ segmentation.
523 */
524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
525{
526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
527 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
528#else
529 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
530#endif
531}
532
533
534/**
535 * Used by TB code to store unsigned 32-bit data w/ segmentation.
536 */
537IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
538{
539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
540 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
541#else
542 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
543#endif
544}
545
546
547/**
548 * Used by TB code to store unsigned 64-bit data w/ segmentation.
549 */
550IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
551{
552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
553 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
554#else
555 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
556#endif
557}
558
559
560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
561/**
562 * Used by TB code to store unsigned 128-bit data w/ segmentation.
563 */
564IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
565{
566#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
567 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
568#else
569 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
570#endif
571}
572
573
574/**
575 * Used by TB code to store unsigned 128-bit data w/ segmentation.
576 */
577IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
578{
579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
580 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
581#else
582 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
583#endif
584}
585
586
587/**
588 * Used by TB code to store unsigned 256-bit data w/ segmentation.
589 */
590IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
591{
592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
593 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
594#else
595 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
596#endif
597}
598
599
600/**
601 * Used by TB code to store unsigned 256-bit data w/ segmentation.
602 */
603IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
604{
605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
606 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
607#else
608 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
609#endif
610}
611#endif
612
613
614
615/**
616 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
617 */
618IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
619{
620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
621 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
622#else
623 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
624#endif
625}
626
627
628/**
629 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
630 */
631IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
632{
633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
634 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
635#else
636 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
637#endif
638}
639
640
641/**
642 * Used by TB code to store an 32-bit selector value onto a generic stack.
643 *
644 * Intel CPUs doesn't do write a whole dword, thus the special function.
645 */
646IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
647{
648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
649 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
650#else
651 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
652#endif
653}
654
655
656/**
657 * Used by TB code to push unsigned 64-bit value onto a generic stack.
658 */
659IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
660{
661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
662 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
663#else
664 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
665#endif
666}
667
668
669/**
670 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
671 */
672IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
673{
674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
675 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
676#else
677 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
678#endif
679}
680
681
682/**
683 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
684 */
685IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
686{
687#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
688 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
689#else
690 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
691#endif
692}
693
694
695/**
696 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
697 */
698IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
699{
700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
701 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
702#else
703 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
704#endif
705}
706
707
708
709/*********************************************************************************************************************************
710* Helpers: Flat memory fetches and stores. *
711*********************************************************************************************************************************/
712
713/**
714 * Used by TB code to load unsigned 8-bit data w/ flat address.
715 * @note Zero extending the value to 64-bit to simplify assembly.
716 */
717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
721#else
722 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
729 * to 16 bits.
730 * @note Zero extending the value to 64-bit to simplify assembly.
731 */
732IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
733{
734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
735 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
736#else
737 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
738#endif
739}
740
741
742/**
743 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
744 * to 32 bits.
745 * @note Zero extending the value to 64-bit to simplify assembly.
746 */
747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
748{
749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
750 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
751#else
752 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
753#endif
754}
755
756
757/**
758 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
759 * to 64 bits.
760 */
761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
762{
763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
764 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
765#else
766 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
767#endif
768}
769
770
771/**
772 * Used by TB code to load unsigned 16-bit data w/ flat address.
773 * @note Zero extending the value to 64-bit to simplify assembly.
774 */
775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
776{
777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
778 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
779#else
780 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
781#endif
782}
783
784
785/**
786 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
787 * to 32 bits.
788 * @note Zero extending the value to 64-bit to simplify assembly.
789 */
790IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
791{
792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
793 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
794#else
795 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
796#endif
797}
798
799
800/**
801 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
802 * to 64 bits.
803 * @note Zero extending the value to 64-bit to simplify assembly.
804 */
805IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
806{
807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
808 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
809#else
810 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
811#endif
812}
813
814
815/**
816 * Used by TB code to load unsigned 32-bit data w/ flat address.
817 * @note Zero extending the value to 64-bit to simplify assembly.
818 */
819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
820{
821#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
822 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
823#else
824 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
825#endif
826}
827
828
829/**
830 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
831 * to 64 bits.
832 * @note Zero extending the value to 64-bit to simplify assembly.
833 */
834IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
835{
836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
837 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
838#else
839 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
840#endif
841}
842
843
844/**
845 * Used by TB code to load unsigned 64-bit data w/ flat address.
846 */
847IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
848{
849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
850 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
851#else
852 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
853#endif
854}
855
856
857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
858/**
859 * Used by TB code to load unsigned 128-bit data w/ flat address.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
864 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
865#else
866 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
867#endif
868}
869
870
871/**
872 * Used by TB code to load unsigned 128-bit data w/ flat address.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
877 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
878#else
879 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
880#endif
881}
882
883
884/**
885 * Used by TB code to load unsigned 128-bit data w/ flat address.
886 */
887IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
888{
889#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
890 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
891#else
892 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
893#endif
894}
895
896
897/**
898 * Used by TB code to load unsigned 256-bit data w/ flat address.
899 */
900IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
901{
902#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
903 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
904#else
905 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
906#endif
907}
908
909
910/**
911 * Used by TB code to load unsigned 256-bit data w/ flat address.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
916 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
917#else
918 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
919#endif
920}
921#endif
922
923
924/**
925 * Used by TB code to store unsigned 8-bit data w/ flat address.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
930 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
931#else
932 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to store unsigned 16-bit data w/ flat address.
939 */
940IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
943 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
944#else
945 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
946#endif
947}
948
949
950/**
951 * Used by TB code to store unsigned 32-bit data w/ flat address.
952 */
953IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
956 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
957#else
958 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
959#endif
960}
961
962
963/**
964 * Used by TB code to store unsigned 64-bit data w/ flat address.
965 */
966IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
969 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
970#else
971 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
972#endif
973}
974
975
976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
977/**
978 * Used by TB code to store unsigned 128-bit data w/ flat address.
979 */
980IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
981{
982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
983 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
984#else
985 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
986#endif
987}
988
989
990/**
991 * Used by TB code to store unsigned 128-bit data w/ flat address.
992 */
993IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
994{
995#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
996 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
997#else
998 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
999#endif
1000}
1001
1002
1003/**
1004 * Used by TB code to store unsigned 256-bit data w/ flat address.
1005 */
1006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1007{
1008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1009 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1010#else
1011 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1012#endif
1013}
1014
1015
1016/**
1017 * Used by TB code to store unsigned 256-bit data w/ flat address.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1022 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1023#else
1024 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1025#endif
1026}
1027#endif
1028
1029
1030
1031/**
1032 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1037 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1038#else
1039 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1050 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1051#else
1052 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to store a segment selector value onto a flat stack.
1059 *
1060 * Intel CPUs doesn't do write a whole dword, thus the special function.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1065 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1066#else
1067 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1074 */
1075IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1076{
1077#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1078 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1079#else
1080 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1081#endif
1082}
1083
1084
1085/**
1086 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1087 */
1088IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1089{
1090#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1091 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1092#else
1093 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1094#endif
1095}
1096
1097
1098/**
1099 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1104 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1105#else
1106 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1117 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1118#else
1119 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124
1125/*********************************************************************************************************************************
1126* Helpers: Segmented memory mapping. *
1127*********************************************************************************************************************************/
1128
1129/**
1130 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1131 * segmentation.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1134 RTGCPTR GCPtrMem, uint8_t iSegReg))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1137 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1138#else
1139 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1148 RTGCPTR GCPtrMem, uint8_t iSegReg))
1149{
1150#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1151 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1152#else
1153 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1154#endif
1155}
1156
1157
1158/**
1159 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1160 */
1161IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1162 RTGCPTR GCPtrMem, uint8_t iSegReg))
1163{
1164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1165 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1166#else
1167 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1168#endif
1169}
1170
1171
1172/**
1173 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1174 */
1175IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1176 RTGCPTR GCPtrMem, uint8_t iSegReg))
1177{
1178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1179 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1180#else
1181 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1182#endif
1183}
1184
1185
1186/**
1187 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1188 * segmentation.
1189 */
1190IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1191 RTGCPTR GCPtrMem, uint8_t iSegReg))
1192{
1193#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1194 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1195#else
1196 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1197#endif
1198}
1199
1200
1201/**
1202 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1203 */
1204IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1205 RTGCPTR GCPtrMem, uint8_t iSegReg))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1208 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1209#else
1210 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1219 RTGCPTR GCPtrMem, uint8_t iSegReg))
1220{
1221#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1222 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1223#else
1224 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1225#endif
1226}
1227
1228
1229/**
1230 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1231 */
1232IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1233 RTGCPTR GCPtrMem, uint8_t iSegReg))
1234{
1235#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1236 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1237#else
1238 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1239#endif
1240}
1241
1242
1243/**
1244 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1245 * segmentation.
1246 */
1247IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1248 RTGCPTR GCPtrMem, uint8_t iSegReg))
1249{
1250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1251 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1252#else
1253 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1254#endif
1255}
1256
1257
1258/**
1259 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1260 */
1261IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1262 RTGCPTR GCPtrMem, uint8_t iSegReg))
1263{
1264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1265 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1266#else
1267 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1268#endif
1269}
1270
1271
1272/**
1273 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1274 */
1275IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1276 RTGCPTR GCPtrMem, uint8_t iSegReg))
1277{
1278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1279 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1280#else
1281 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1282#endif
1283}
1284
1285
1286/**
1287 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1288 */
1289IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1290 RTGCPTR GCPtrMem, uint8_t iSegReg))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1293 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1294#else
1295 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1296#endif
1297}
1298
1299
1300/**
1301 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1302 * segmentation.
1303 */
1304IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1305 RTGCPTR GCPtrMem, uint8_t iSegReg))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1308 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1309#else
1310 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1319 RTGCPTR GCPtrMem, uint8_t iSegReg))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1322 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1323#else
1324 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1331 */
1332IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1333 RTGCPTR GCPtrMem, uint8_t iSegReg))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1336 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1337#else
1338 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1347 RTGCPTR GCPtrMem, uint8_t iSegReg))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1350 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1351#else
1352 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1361 RTGCPTR GCPtrMem, uint8_t iSegReg))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1364 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1365#else
1366 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1375 RTGCPTR GCPtrMem, uint8_t iSegReg))
1376{
1377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1378 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1379#else
1380 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1381#endif
1382}
1383
1384
1385/**
1386 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1387 * segmentation.
1388 */
1389IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1390 RTGCPTR GCPtrMem, uint8_t iSegReg))
1391{
1392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1393 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1394#else
1395 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1396#endif
1397}
1398
1399
1400/**
1401 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1402 */
1403IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1404 RTGCPTR GCPtrMem, uint8_t iSegReg))
1405{
1406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1407 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1408#else
1409 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#endif
1411}
1412
1413
1414/**
1415 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1416 */
1417IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1418 RTGCPTR GCPtrMem, uint8_t iSegReg))
1419{
1420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1421 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1422#else
1423 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#endif
1425}
1426
1427
1428/**
1429 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1430 */
1431IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1432 RTGCPTR GCPtrMem, uint8_t iSegReg))
1433{
1434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1435 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1436#else
1437 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#endif
1439}
1440
1441
1442/*********************************************************************************************************************************
1443* Helpers: Flat memory mapping. *
1444*********************************************************************************************************************************/
1445
1446/**
1447 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1448 * address.
1449 */
1450IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1451{
1452#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1453 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1454#else
1455 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1456#endif
1457}
1458
1459
1460/**
1461 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1467#else
1468 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1480#else
1481 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1490{
1491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1492 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1493#else
1494 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1495#endif
1496}
1497
1498
1499/**
1500 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1501 * address.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1504{
1505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1506 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1507#else
1508 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1509#endif
1510}
1511
1512
1513/**
1514 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1515 */
1516IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1517{
1518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1519 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1520#else
1521 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1522#endif
1523}
1524
1525
1526/**
1527 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1528 */
1529IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1530{
1531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1532 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1533#else
1534 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1535#endif
1536}
1537
1538
1539/**
1540 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1541 */
1542IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1543{
1544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1545 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1546#else
1547 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1548#endif
1549}
1550
1551
1552/**
1553 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1554 * address.
1555 */
1556IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1557{
1558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1559 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1560#else
1561 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1562#endif
1563}
1564
1565
1566/**
1567 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1568 */
1569IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1570{
1571#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1572 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1573#else
1574 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1575#endif
1576}
1577
1578
1579/**
1580 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1581 */
1582IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1583{
1584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1585 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1586#else
1587 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1588#endif
1589}
1590
1591
1592/**
1593 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1594 */
1595IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1599#else
1600 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1607 * address.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1613#else
1614 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1623{
1624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1625 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1626#else
1627 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1628#endif
1629}
1630
1631
1632/**
1633 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1636{
1637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1638 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1639#else
1640 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1641#endif
1642}
1643
1644
1645/**
1646 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1652#else
1653 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1665#else
1666 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1675{
1676#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1677 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1678#else
1679 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1680#endif
1681}
1682
1683
1684/**
1685 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1686 * address.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1689{
1690#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1691 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1692#else
1693 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1694#endif
1695}
1696
1697
1698/**
1699 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1704 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1705#else
1706 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1715{
1716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1717 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1718#else
1719 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1720#endif
1721}
1722
1723
1724/**
1725 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1730 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1731#else
1732 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1733#endif
1734}
1735
1736
1737/*********************************************************************************************************************************
1738* Helpers: Commit, rollback & unmap *
1739*********************************************************************************************************************************/
1740
1741/**
1742 * Used by TB code to commit and unmap a read-write memory mapping.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1745{
1746 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1747}
1748
1749
1750/**
1751 * Used by TB code to commit and unmap a read-write memory mapping.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1754{
1755 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1756}
1757
1758
1759/**
1760 * Used by TB code to commit and unmap a write-only memory mapping.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1763{
1764 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1765}
1766
1767
1768/**
1769 * Used by TB code to commit and unmap a read-only memory mapping.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1772{
1773 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1774}
1775
1776
1777/**
1778 * Reinitializes the native recompiler state.
1779 *
1780 * Called before starting a new recompile job.
1781 */
1782static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1783{
1784 pReNative->cLabels = 0;
1785 pReNative->bmLabelTypes = 0;
1786 pReNative->cFixups = 0;
1787#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1788 pReNative->pDbgInfo->cEntries = 0;
1789 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1790#endif
1791 pReNative->pTbOrg = pTb;
1792 pReNative->cCondDepth = 0;
1793 pReNative->uCondSeqNo = 0;
1794 pReNative->uCheckIrqSeqNo = 0;
1795 pReNative->uTlbSeqNo = 0;
1796
1797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1798 pReNative->Core.offPc = 0;
1799 pReNative->Core.cInstrPcUpdateSkipped = 0;
1800#endif
1801#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1802 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1803#endif
1804 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1805#if IEMNATIVE_HST_GREG_COUNT < 32
1806 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1807#endif
1808 ;
1809 pReNative->Core.bmHstRegsWithGstShadow = 0;
1810 pReNative->Core.bmGstRegShadows = 0;
1811#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1812 pReNative->Core.bmGstRegShadowDirty = 0;
1813#endif
1814 pReNative->Core.bmVars = 0;
1815 pReNative->Core.bmStack = 0;
1816 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1817 pReNative->Core.u64ArgVars = UINT64_MAX;
1818
1819 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
1820 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1821 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1822 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1823 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1824 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1825 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1826 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1827 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1828 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1829 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1830 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1837
1838 /* Full host register reinit: */
1839 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1840 {
1841 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1842 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1843 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1844 }
1845
1846 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1847 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1848#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1849 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1850#endif
1851#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1852 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1853#endif
1854#ifdef IEMNATIVE_REG_FIXED_TMP1
1855 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1856#endif
1857#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1858 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1859#endif
1860 );
1861 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1862 {
1863 fRegs &= ~RT_BIT_32(idxReg);
1864 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1865 }
1866
1867 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1868#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1869 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1870#endif
1871#ifdef IEMNATIVE_REG_FIXED_TMP0
1872 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1873#endif
1874#ifdef IEMNATIVE_REG_FIXED_TMP1
1875 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1876#endif
1877#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1878 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1879#endif
1880
1881#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1882 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1883# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1884 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1885# endif
1886 ;
1887 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1888 pReNative->Core.bmGstSimdRegShadows = 0;
1889 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1890 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1891
1892 /* Full host register reinit: */
1893 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1894 {
1895 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1896 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1897 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1898 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1899 }
1900
1901 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1902 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1903 {
1904 fRegs &= ~RT_BIT_32(idxReg);
1905 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1906 }
1907
1908#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1909 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1910#endif
1911
1912#endif
1913
1914 return pReNative;
1915}
1916
1917
1918/**
1919 * Allocates and initializes the native recompiler state.
1920 *
1921 * This is called the first time an EMT wants to recompile something.
1922 *
1923 * @returns Pointer to the new recompiler state.
1924 * @param pVCpu The cross context virtual CPU structure of the calling
1925 * thread.
1926 * @param pTb The TB that's about to be recompiled.
1927 * @thread EMT(pVCpu)
1928 */
1929static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1930{
1931 VMCPU_ASSERT_EMT(pVCpu);
1932
1933 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1934 AssertReturn(pReNative, NULL);
1935
1936 /*
1937 * Try allocate all the buffers and stuff we need.
1938 */
1939 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1940 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1941 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1943 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1944#endif
1945 if (RT_LIKELY( pReNative->pInstrBuf
1946 && pReNative->paLabels
1947 && pReNative->paFixups)
1948#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1949 && pReNative->pDbgInfo
1950#endif
1951 )
1952 {
1953 /*
1954 * Set the buffer & array sizes on success.
1955 */
1956 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1957 pReNative->cLabelsAlloc = _8K;
1958 pReNative->cFixupsAlloc = _16K;
1959#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1960 pReNative->cDbgInfoAlloc = _16K;
1961#endif
1962
1963 /* Other constant stuff: */
1964 pReNative->pVCpu = pVCpu;
1965
1966 /*
1967 * Done, just need to save it and reinit it.
1968 */
1969 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1970 return iemNativeReInit(pReNative, pTb);
1971 }
1972
1973 /*
1974 * Failed. Cleanup and return.
1975 */
1976 AssertFailed();
1977 RTMemFree(pReNative->pInstrBuf);
1978 RTMemFree(pReNative->paLabels);
1979 RTMemFree(pReNative->paFixups);
1980#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1981 RTMemFree(pReNative->pDbgInfo);
1982#endif
1983 RTMemFree(pReNative);
1984 return NULL;
1985}
1986
1987
1988/**
1989 * Creates a label
1990 *
1991 * If the label does not yet have a defined position,
1992 * call iemNativeLabelDefine() later to set it.
1993 *
1994 * @returns Label ID. Throws VBox status code on failure, so no need to check
1995 * the return value.
1996 * @param pReNative The native recompile state.
1997 * @param enmType The label type.
1998 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1999 * label is not yet defined (default).
2000 * @param uData Data associated with the lable. Only applicable to
2001 * certain type of labels. Default is zero.
2002 */
2003DECL_HIDDEN_THROW(uint32_t)
2004iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2005 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2006{
2007 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2008
2009 /*
2010 * Locate existing label definition.
2011 *
2012 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2013 * and uData is zero.
2014 */
2015 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2016 uint32_t const cLabels = pReNative->cLabels;
2017 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2018#ifndef VBOX_STRICT
2019 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2020 && offWhere == UINT32_MAX
2021 && uData == 0
2022#endif
2023 )
2024 {
2025#ifndef VBOX_STRICT
2026 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2027 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2028 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2029 if (idxLabel < pReNative->cLabels)
2030 return idxLabel;
2031#else
2032 for (uint32_t i = 0; i < cLabels; i++)
2033 if ( paLabels[i].enmType == enmType
2034 && paLabels[i].uData == uData)
2035 {
2036 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2037 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2038 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2039 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2040 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2041 return i;
2042 }
2043 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2044 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2045#endif
2046 }
2047
2048 /*
2049 * Make sure we've got room for another label.
2050 */
2051 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2052 { /* likely */ }
2053 else
2054 {
2055 uint32_t cNew = pReNative->cLabelsAlloc;
2056 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2057 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2058 cNew *= 2;
2059 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2060 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2061 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2062 pReNative->paLabels = paLabels;
2063 pReNative->cLabelsAlloc = cNew;
2064 }
2065
2066 /*
2067 * Define a new label.
2068 */
2069 paLabels[cLabels].off = offWhere;
2070 paLabels[cLabels].enmType = enmType;
2071 paLabels[cLabels].uData = uData;
2072 pReNative->cLabels = cLabels + 1;
2073
2074 Assert((unsigned)enmType < 64);
2075 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2076
2077 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2078 {
2079 Assert(uData == 0);
2080 pReNative->aidxUniqueLabels[enmType] = cLabels;
2081 }
2082
2083 if (offWhere != UINT32_MAX)
2084 {
2085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2086 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2087 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2088#endif
2089 }
2090 return cLabels;
2091}
2092
2093
2094/**
2095 * Defines the location of an existing label.
2096 *
2097 * @param pReNative The native recompile state.
2098 * @param idxLabel The label to define.
2099 * @param offWhere The position.
2100 */
2101DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2102{
2103 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2104 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2105 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2106 pLabel->off = offWhere;
2107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2108 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2109 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2110#endif
2111}
2112
2113
2114/**
2115 * Looks up a lable.
2116 *
2117 * @returns Label ID if found, UINT32_MAX if not.
2118 */
2119static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2120 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2121{
2122 Assert((unsigned)enmType < 64);
2123 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2124 {
2125 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2126 return pReNative->aidxUniqueLabels[enmType];
2127
2128 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2129 uint32_t const cLabels = pReNative->cLabels;
2130 for (uint32_t i = 0; i < cLabels; i++)
2131 if ( paLabels[i].enmType == enmType
2132 && paLabels[i].uData == uData
2133 && ( paLabels[i].off == offWhere
2134 || offWhere == UINT32_MAX
2135 || paLabels[i].off == UINT32_MAX))
2136 return i;
2137 }
2138 return UINT32_MAX;
2139}
2140
2141
2142/**
2143 * Adds a fixup.
2144 *
2145 * @throws VBox status code (int) on failure.
2146 * @param pReNative The native recompile state.
2147 * @param offWhere The instruction offset of the fixup location.
2148 * @param idxLabel The target label ID for the fixup.
2149 * @param enmType The fixup type.
2150 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2151 */
2152DECL_HIDDEN_THROW(void)
2153iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2154 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2155{
2156 Assert(idxLabel <= UINT16_MAX);
2157 Assert((unsigned)enmType <= UINT8_MAX);
2158#ifdef RT_ARCH_ARM64
2159 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2160 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2162#endif
2163
2164 /*
2165 * Make sure we've room.
2166 */
2167 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2168 uint32_t const cFixups = pReNative->cFixups;
2169 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2170 { /* likely */ }
2171 else
2172 {
2173 uint32_t cNew = pReNative->cFixupsAlloc;
2174 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2175 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2176 cNew *= 2;
2177 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2178 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2179 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2180 pReNative->paFixups = paFixups;
2181 pReNative->cFixupsAlloc = cNew;
2182 }
2183
2184 /*
2185 * Add the fixup.
2186 */
2187 paFixups[cFixups].off = offWhere;
2188 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2189 paFixups[cFixups].enmType = enmType;
2190 paFixups[cFixups].offAddend = offAddend;
2191 pReNative->cFixups = cFixups + 1;
2192}
2193
2194
2195/**
2196 * Slow code path for iemNativeInstrBufEnsure.
2197 */
2198DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2199{
2200 /* Double the buffer size till we meet the request. */
2201 uint32_t cNew = pReNative->cInstrBufAlloc;
2202 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2203 do
2204 cNew *= 2;
2205 while (cNew < off + cInstrReq);
2206
2207 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2208#ifdef RT_ARCH_ARM64
2209 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2210#else
2211 uint32_t const cbMaxInstrBuf = _2M;
2212#endif
2213 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2214
2215 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2216 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2217
2218#ifdef VBOX_STRICT
2219 pReNative->offInstrBufChecked = off + cInstrReq;
2220#endif
2221 pReNative->cInstrBufAlloc = cNew;
2222 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2223}
2224
2225#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2226
2227/**
2228 * Grows the static debug info array used during recompilation.
2229 *
2230 * @returns Pointer to the new debug info block; throws VBox status code on
2231 * failure, so no need to check the return value.
2232 */
2233DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2234{
2235 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2236 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2237 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2238 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2239 pReNative->pDbgInfo = pDbgInfo;
2240 pReNative->cDbgInfoAlloc = cNew;
2241 return pDbgInfo;
2242}
2243
2244
2245/**
2246 * Adds a new debug info uninitialized entry, returning the pointer to it.
2247 */
2248DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2249{
2250 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2251 { /* likely */ }
2252 else
2253 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2254 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2255}
2256
2257
2258/**
2259 * Debug Info: Adds a native offset record, if necessary.
2260 */
2261DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2262{
2263 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2264
2265 /*
2266 * Do we need this one?
2267 */
2268 uint32_t const offPrev = pDbgInfo->offNativeLast;
2269 if (offPrev == off)
2270 return;
2271 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2272
2273 /*
2274 * Add it.
2275 */
2276 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2277 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2278 pEntry->NativeOffset.offNative = off;
2279 pDbgInfo->offNativeLast = off;
2280}
2281
2282
2283/**
2284 * Debug Info: Record info about a label.
2285 */
2286static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2287{
2288 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2289 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2290 pEntry->Label.uUnused = 0;
2291 pEntry->Label.enmLabel = (uint8_t)enmType;
2292 pEntry->Label.uData = uData;
2293}
2294
2295
2296/**
2297 * Debug Info: Record info about a threaded call.
2298 */
2299static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2300{
2301 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2302 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2303 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2304 pEntry->ThreadedCall.uUnused = 0;
2305 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2306}
2307
2308
2309/**
2310 * Debug Info: Record info about a new guest instruction.
2311 */
2312static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2313{
2314 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2315 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2316 pEntry->GuestInstruction.uUnused = 0;
2317 pEntry->GuestInstruction.fExec = fExec;
2318}
2319
2320
2321/**
2322 * Debug Info: Record info about guest register shadowing.
2323 */
2324DECL_HIDDEN_THROW(void)
2325iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2326 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2327{
2328 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2329 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2330 pEntry->GuestRegShadowing.uUnused = 0;
2331 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2332 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2333 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2334#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2335 Assert( idxHstReg != UINT8_MAX
2336 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2337#endif
2338}
2339
2340
2341# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2342/**
2343 * Debug Info: Record info about guest register shadowing.
2344 */
2345DECL_HIDDEN_THROW(void)
2346iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2347 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2348{
2349 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2350 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2351 pEntry->GuestSimdRegShadowing.uUnused = 0;
2352 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2353 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2354 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2355}
2356# endif
2357
2358
2359# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2360/**
2361 * Debug Info: Record info about delayed RIP updates.
2362 */
2363DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2364{
2365 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2366 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2367 pEntry->DelayedPcUpdate.offPc = offPc;
2368 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2369}
2370# endif
2371
2372# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2373
2374/**
2375 * Debug Info: Record info about a dirty guest register.
2376 */
2377DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2378 uint8_t idxGstReg, uint8_t idxHstReg)
2379{
2380 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2381 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2382 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2383 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2384 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2385}
2386
2387
2388/**
2389 * Debug Info: Record info about a dirty guest register writeback operation.
2390 */
2391DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2392{
2393 unsigned const cBitsGstRegMask = 25;
2394 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2395
2396 /* The first block of 25 bits: */
2397 if (fGstReg & fGstRegMask)
2398 {
2399 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2400 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2401 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2402 pEntry->GuestRegWriteback.cShift = 0;
2403 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2404 fGstReg &= ~(uint64_t)fGstRegMask;
2405 if (!fGstReg)
2406 return;
2407 }
2408
2409 /* The second block of 25 bits: */
2410 fGstReg >>= cBitsGstRegMask;
2411 if (fGstReg & fGstRegMask)
2412 {
2413 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2414 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2415 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2416 pEntry->GuestRegWriteback.cShift = 0;
2417 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2418 fGstReg &= ~(uint64_t)fGstRegMask;
2419 if (!fGstReg)
2420 return;
2421 }
2422
2423 /* The last block with 14 bits: */
2424 fGstReg >>= cBitsGstRegMask;
2425 Assert(fGstReg & fGstRegMask);
2426 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2427 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2428 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2429 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2430 pEntry->GuestRegWriteback.cShift = 2;
2431 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2432}
2433
2434# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2435
2436#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2437
2438
2439/*********************************************************************************************************************************
2440* Register Allocator *
2441*********************************************************************************************************************************/
2442
2443/**
2444 * Register parameter indexes (indexed by argument number).
2445 */
2446DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2447{
2448 IEMNATIVE_CALL_ARG0_GREG,
2449 IEMNATIVE_CALL_ARG1_GREG,
2450 IEMNATIVE_CALL_ARG2_GREG,
2451 IEMNATIVE_CALL_ARG3_GREG,
2452#if defined(IEMNATIVE_CALL_ARG4_GREG)
2453 IEMNATIVE_CALL_ARG4_GREG,
2454# if defined(IEMNATIVE_CALL_ARG5_GREG)
2455 IEMNATIVE_CALL_ARG5_GREG,
2456# if defined(IEMNATIVE_CALL_ARG6_GREG)
2457 IEMNATIVE_CALL_ARG6_GREG,
2458# if defined(IEMNATIVE_CALL_ARG7_GREG)
2459 IEMNATIVE_CALL_ARG7_GREG,
2460# endif
2461# endif
2462# endif
2463#endif
2464};
2465AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2466
2467/**
2468 * Call register masks indexed by argument count.
2469 */
2470DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2471{
2472 0,
2473 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2474 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2475 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2476 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2477 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2478#if defined(IEMNATIVE_CALL_ARG4_GREG)
2479 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2480 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2481# if defined(IEMNATIVE_CALL_ARG5_GREG)
2482 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2483 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2484# if defined(IEMNATIVE_CALL_ARG6_GREG)
2485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2486 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2487 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2488# if defined(IEMNATIVE_CALL_ARG7_GREG)
2489 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2490 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2491 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2492# endif
2493# endif
2494# endif
2495#endif
2496};
2497
2498#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2499/**
2500 * BP offset of the stack argument slots.
2501 *
2502 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2503 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2504 */
2505DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2506{
2507 IEMNATIVE_FP_OFF_STACK_ARG0,
2508# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2509 IEMNATIVE_FP_OFF_STACK_ARG1,
2510# endif
2511# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2512 IEMNATIVE_FP_OFF_STACK_ARG2,
2513# endif
2514# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2515 IEMNATIVE_FP_OFF_STACK_ARG3,
2516# endif
2517};
2518AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2519#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2520
2521/**
2522 * Info about shadowed guest register values.
2523 * @see IEMNATIVEGSTREG
2524 */
2525DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2526{
2527#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2528 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2529 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2530 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2531 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2532 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2533 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2534 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2535 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2536 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2537 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2538 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2539 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2540 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2541 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2542 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2543 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2544 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2545 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2546 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2547 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2548 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2549 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2550 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2551 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2552 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2553 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2554 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2555 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2556 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2557 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2558 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2559 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2560 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2561 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2562 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2563 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2564 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2565 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2566 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2567 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2568 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2569 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2570 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2571 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2572 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2573 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2574 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2575 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2576#undef CPUMCTX_OFF_AND_SIZE
2577};
2578AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2579
2580
2581/** Host CPU general purpose register names. */
2582DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2583{
2584#ifdef RT_ARCH_AMD64
2585 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2586#elif RT_ARCH_ARM64
2587 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2588 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2589#else
2590# error "port me"
2591#endif
2592};
2593
2594
2595#if 0 /* unused */
2596/**
2597 * Tries to locate a suitable register in the given register mask.
2598 *
2599 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2600 * failed.
2601 *
2602 * @returns Host register number on success, returns UINT8_MAX on failure.
2603 */
2604static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2605{
2606 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2607 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2608 if (fRegs)
2609 {
2610 /** @todo pick better here: */
2611 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2612
2613 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2614 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2615 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2616 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2617
2618 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2619 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2620 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2621 return idxReg;
2622 }
2623 return UINT8_MAX;
2624}
2625#endif /* unused */
2626
2627
2628#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2629/**
2630 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2631 *
2632 * @returns New code buffer offset on success, UINT32_MAX on failure.
2633 * @param pReNative .
2634 * @param off The current code buffer position.
2635 * @param enmGstReg The guest register to store to.
2636 * @param idxHstReg The host register to store from.
2637 */
2638DECL_FORCE_INLINE_THROW(uint32_t)
2639iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2640{
2641 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2642 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2643
2644 switch (g_aGstShadowInfo[enmGstReg].cb)
2645 {
2646 case sizeof(uint64_t):
2647 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2648 case sizeof(uint32_t):
2649 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2650 case sizeof(uint16_t):
2651 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2652#if 0 /* not present in the table. */
2653 case sizeof(uint8_t):
2654 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2655#endif
2656 default:
2657 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2658 }
2659}
2660
2661
2662/**
2663 * Emits code to flush a pending write of the given guest register if any.
2664 *
2665 * @returns New code buffer offset.
2666 * @param pReNative The native recompile state.
2667 * @param off Current code buffer position.
2668 * @param enmGstReg The guest register to flush.
2669 */
2670DECL_HIDDEN_THROW(uint32_t)
2671iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2672{
2673 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2674
2675 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2676 && enmGstReg <= kIemNativeGstReg_GprLast)
2677 || enmGstReg == kIemNativeGstReg_MxCsr);
2678 Assert( idxHstReg != UINT8_MAX
2679 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2680 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2681 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2682
2683 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2684
2685 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2686 return off;
2687}
2688
2689
2690/**
2691 * Flush the given set of guest registers if marked as dirty.
2692 *
2693 * @returns New code buffer offset.
2694 * @param pReNative The native recompile state.
2695 * @param off Current code buffer position.
2696 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2697 */
2698DECL_HIDDEN_THROW(uint32_t)
2699iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2700{
2701 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2702 if (bmGstRegShadowDirty)
2703 {
2704# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2705 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2706 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2707# endif
2708 do
2709 {
2710 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2711 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2712 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2713 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2714 } while (bmGstRegShadowDirty);
2715 }
2716
2717 return off;
2718}
2719
2720
2721/**
2722 * Flush all shadowed guest registers marked as dirty for the given host register.
2723 *
2724 * @returns New code buffer offset.
2725 * @param pReNative The native recompile state.
2726 * @param off Current code buffer position.
2727 * @param idxHstReg The host register.
2728 *
2729 * @note This doesn't do any unshadowing of guest registers from the host register.
2730 */
2731DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2732{
2733 /* We need to flush any pending guest register writes this host register shadows. */
2734 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2735 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2736 {
2737# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2738 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2739 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2740# endif
2741 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2742 * likely to only have a single bit set. It'll be in the 0..15 range,
2743 * but still it's 15 unnecessary loops for the last guest register. */
2744
2745 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2746 do
2747 {
2748 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2749 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2750 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2751 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2752 } while (bmGstRegShadowDirty);
2753 }
2754
2755 return off;
2756}
2757#endif
2758
2759
2760/**
2761 * Locate a register, possibly freeing one up.
2762 *
2763 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2764 * failed.
2765 *
2766 * @returns Host register number on success. Returns UINT8_MAX if no registers
2767 * found, the caller is supposed to deal with this and raise a
2768 * allocation type specific status code (if desired).
2769 *
2770 * @throws VBox status code if we're run into trouble spilling a variable of
2771 * recording debug info. Does NOT throw anything if we're out of
2772 * registers, though.
2773 */
2774static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2775 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2776{
2777 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2778 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2779 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2780
2781 /*
2782 * Try a freed register that's shadowing a guest register.
2783 */
2784 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2785 if (fRegs)
2786 {
2787 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2788
2789#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2790 /*
2791 * When we have livness information, we use it to kick out all shadowed
2792 * guest register that will not be needed any more in this TB. If we're
2793 * lucky, this may prevent us from ending up here again.
2794 *
2795 * Note! We must consider the previous entry here so we don't free
2796 * anything that the current threaded function requires (current
2797 * entry is produced by the next threaded function).
2798 */
2799 uint32_t const idxCurCall = pReNative->idxCurCall;
2800 if (idxCurCall > 0)
2801 {
2802 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2803
2804# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2805 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2806 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2807 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2808#else
2809 /* Construct a mask of the registers not in the read or write state.
2810 Note! We could skips writes, if they aren't from us, as this is just
2811 a hack to prevent trashing registers that have just been written
2812 or will be written when we retire the current instruction. */
2813 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2814 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2815 & IEMLIVENESSBIT_MASK;
2816#endif
2817 /* Merge EFLAGS. */
2818 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2819 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2820 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2821 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2822 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2823
2824 /* If it matches any shadowed registers. */
2825 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2826 {
2827#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2828 /* Writeback any dirty shadow registers we are about to unshadow. */
2829 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2830#endif
2831
2832 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2833 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2834 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2835
2836 /* See if we've got any unshadowed registers we can return now. */
2837 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2838 if (fUnshadowedRegs)
2839 {
2840 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2841 return (fPreferVolatile
2842 ? ASMBitFirstSetU32(fUnshadowedRegs)
2843 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2844 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2845 - 1;
2846 }
2847 }
2848 }
2849#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2850
2851 unsigned const idxReg = (fPreferVolatile
2852 ? ASMBitFirstSetU32(fRegs)
2853 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2854 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2855 - 1;
2856
2857 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2858 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2859 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2860 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2861
2862#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2863 /* We need to flush any pending guest register writes this host register shadows. */
2864 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2865#endif
2866
2867 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2868 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2869 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2870 return idxReg;
2871 }
2872
2873 /*
2874 * Try free up a variable that's in a register.
2875 *
2876 * We do two rounds here, first evacuating variables we don't need to be
2877 * saved on the stack, then in the second round move things to the stack.
2878 */
2879 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2880 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2881 {
2882 uint32_t fVars = pReNative->Core.bmVars;
2883 while (fVars)
2884 {
2885 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2886 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2887#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2888 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2889 continue;
2890#endif
2891
2892 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2893 && (RT_BIT_32(idxReg) & fRegMask)
2894 && ( iLoop == 0
2895 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2896 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2897 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2898 {
2899 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2900 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2901 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2902 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2903 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2904 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2905#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2906 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2907#endif
2908
2909 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2910 {
2911 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2912 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2913 }
2914
2915 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2916 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2917
2918 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2919 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2920 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2921 return idxReg;
2922 }
2923 fVars &= ~RT_BIT_32(idxVar);
2924 }
2925 }
2926
2927 return UINT8_MAX;
2928}
2929
2930
2931/**
2932 * Reassigns a variable to a different register specified by the caller.
2933 *
2934 * @returns The new code buffer position.
2935 * @param pReNative The native recompile state.
2936 * @param off The current code buffer position.
2937 * @param idxVar The variable index.
2938 * @param idxRegOld The old host register number.
2939 * @param idxRegNew The new host register number.
2940 * @param pszCaller The caller for logging.
2941 */
2942static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2943 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2944{
2945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2946 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2947#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2948 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2949#endif
2950 RT_NOREF(pszCaller);
2951
2952#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2953 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2954#endif
2955 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2956
2957 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2958#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2959 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2960#endif
2961 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2962 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2964
2965 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2966 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2967 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2968 if (fGstRegShadows)
2969 {
2970 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2971 | RT_BIT_32(idxRegNew);
2972 while (fGstRegShadows)
2973 {
2974 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2975 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2976
2977 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2978 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2979 }
2980 }
2981
2982 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2983 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2984 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2985 return off;
2986}
2987
2988
2989/**
2990 * Moves a variable to a different register or spills it onto the stack.
2991 *
2992 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2993 * kinds can easily be recreated if needed later.
2994 *
2995 * @returns The new code buffer position.
2996 * @param pReNative The native recompile state.
2997 * @param off The current code buffer position.
2998 * @param idxVar The variable index.
2999 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3000 * call-volatile registers.
3001 */
3002DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3003 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3004{
3005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3006 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3007 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3008 Assert(!pVar->fRegAcquired);
3009
3010 uint8_t const idxRegOld = pVar->idxReg;
3011 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3012 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3013 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3014 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3015 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3016 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3017 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3018 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3019#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3020 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3021#endif
3022
3023
3024 /** @todo Add statistics on this.*/
3025 /** @todo Implement basic variable liveness analysis (python) so variables
3026 * can be freed immediately once no longer used. This has the potential to
3027 * be trashing registers and stack for dead variables.
3028 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3029
3030 /*
3031 * First try move it to a different register, as that's cheaper.
3032 */
3033 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3034 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3035 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3036 if (fRegs)
3037 {
3038 /* Avoid using shadow registers, if possible. */
3039 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3040 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3041 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3042 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3043 }
3044
3045 /*
3046 * Otherwise we must spill the register onto the stack.
3047 */
3048 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3049 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3050 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3051 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3052
3053 pVar->idxReg = UINT8_MAX;
3054 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3055 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3056 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3057 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3058 return off;
3059}
3060
3061
3062/**
3063 * Allocates a temporary host general purpose register.
3064 *
3065 * This may emit code to save register content onto the stack in order to free
3066 * up a register.
3067 *
3068 * @returns The host register number; throws VBox status code on failure,
3069 * so no need to check the return value.
3070 * @param pReNative The native recompile state.
3071 * @param poff Pointer to the variable with the code buffer position.
3072 * This will be update if we need to move a variable from
3073 * register to stack in order to satisfy the request.
3074 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3075 * registers (@c true, default) or the other way around
3076 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3077 */
3078DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3079{
3080 /*
3081 * Try find a completely unused register, preferably a call-volatile one.
3082 */
3083 uint8_t idxReg;
3084 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3085 & ~pReNative->Core.bmHstRegsWithGstShadow
3086 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3087 if (fRegs)
3088 {
3089 if (fPreferVolatile)
3090 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3091 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3092 else
3093 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3094 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3095 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3096 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3097 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3098 }
3099 else
3100 {
3101 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3102 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3103 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3104 }
3105 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3106}
3107
3108
3109/**
3110 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3111 * registers.
3112 *
3113 * @returns The host register number; throws VBox status code on failure,
3114 * so no need to check the return value.
3115 * @param pReNative The native recompile state.
3116 * @param poff Pointer to the variable with the code buffer position.
3117 * This will be update if we need to move a variable from
3118 * register to stack in order to satisfy the request.
3119 * @param fRegMask Mask of acceptable registers.
3120 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3121 * registers (@c true, default) or the other way around
3122 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3123 */
3124DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3125 bool fPreferVolatile /*= true*/)
3126{
3127 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3128 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3129
3130 /*
3131 * Try find a completely unused register, preferably a call-volatile one.
3132 */
3133 uint8_t idxReg;
3134 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3135 & ~pReNative->Core.bmHstRegsWithGstShadow
3136 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3137 & fRegMask;
3138 if (fRegs)
3139 {
3140 if (fPreferVolatile)
3141 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3142 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3143 else
3144 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3145 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3146 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3147 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3148 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3149 }
3150 else
3151 {
3152 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3153 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3154 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3155 }
3156 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3157}
3158
3159
3160/**
3161 * Allocates a temporary register for loading an immediate value into.
3162 *
3163 * This will emit code to load the immediate, unless there happens to be an
3164 * unused register with the value already loaded.
3165 *
3166 * The caller will not modify the returned register, it must be considered
3167 * read-only. Free using iemNativeRegFreeTmpImm.
3168 *
3169 * @returns The host register number; throws VBox status code on failure, so no
3170 * need to check the return value.
3171 * @param pReNative The native recompile state.
3172 * @param poff Pointer to the variable with the code buffer position.
3173 * @param uImm The immediate value that the register must hold upon
3174 * return.
3175 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3176 * registers (@c true, default) or the other way around
3177 * (@c false).
3178 *
3179 * @note Reusing immediate values has not been implemented yet.
3180 */
3181DECL_HIDDEN_THROW(uint8_t)
3182iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3183{
3184 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3185 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3186 return idxReg;
3187}
3188
3189
3190/**
3191 * Allocates a temporary host general purpose register for keeping a guest
3192 * register value.
3193 *
3194 * Since we may already have a register holding the guest register value,
3195 * code will be emitted to do the loading if that's not the case. Code may also
3196 * be emitted if we have to free up a register to satify the request.
3197 *
3198 * @returns The host register number; throws VBox status code on failure, so no
3199 * need to check the return value.
3200 * @param pReNative The native recompile state.
3201 * @param poff Pointer to the variable with the code buffer
3202 * position. This will be update if we need to move a
3203 * variable from register to stack in order to satisfy
3204 * the request.
3205 * @param enmGstReg The guest register that will is to be updated.
3206 * @param enmIntendedUse How the caller will be using the host register.
3207 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3208 * register is okay (default). The ASSUMPTION here is
3209 * that the caller has already flushed all volatile
3210 * registers, so this is only applied if we allocate a
3211 * new register.
3212 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3213 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3214 */
3215DECL_HIDDEN_THROW(uint8_t)
3216iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3217 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3218 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3219{
3220 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3221#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3222 AssertMsg( fSkipLivenessAssert
3223 || pReNative->idxCurCall == 0
3224 || enmGstReg == kIemNativeGstReg_Pc
3225 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3226 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3227 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3228 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3229 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3230 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3231#endif
3232 RT_NOREF(fSkipLivenessAssert);
3233#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3234 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3235#endif
3236 uint32_t const fRegMask = !fNoVolatileRegs
3237 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3238 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3239
3240 /*
3241 * First check if the guest register value is already in a host register.
3242 */
3243 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3244 {
3245 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3246 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3247 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3248 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3249
3250 /* It's not supposed to be allocated... */
3251 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3252 {
3253 /*
3254 * If the register will trash the guest shadow copy, try find a
3255 * completely unused register we can use instead. If that fails,
3256 * we need to disassociate the host reg from the guest reg.
3257 */
3258 /** @todo would be nice to know if preserving the register is in any way helpful. */
3259 /* If the purpose is calculations, try duplicate the register value as
3260 we'll be clobbering the shadow. */
3261 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3262 && ( ~pReNative->Core.bmHstRegs
3263 & ~pReNative->Core.bmHstRegsWithGstShadow
3264 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3265 {
3266 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3267
3268 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3269
3270 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3271 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3272 g_apszIemNativeHstRegNames[idxRegNew]));
3273 idxReg = idxRegNew;
3274 }
3275 /* If the current register matches the restrictions, go ahead and allocate
3276 it for the caller. */
3277 else if (fRegMask & RT_BIT_32(idxReg))
3278 {
3279 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3280 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3281 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3282 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3283 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3284 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3285 else
3286 {
3287 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3288 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3289 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3290 }
3291 }
3292 /* Otherwise, allocate a register that satisfies the caller and transfer
3293 the shadowing if compatible with the intended use. (This basically
3294 means the call wants a non-volatile register (RSP push/pop scenario).) */
3295 else
3296 {
3297 Assert(fNoVolatileRegs);
3298 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3299 !fNoVolatileRegs
3300 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3301 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3302 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3303 {
3304 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3305 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3306 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3307 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3308 }
3309 else
3310 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3311 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3312 g_apszIemNativeHstRegNames[idxRegNew]));
3313 idxReg = idxRegNew;
3314 }
3315 }
3316 else
3317 {
3318 /*
3319 * Oops. Shadowed guest register already allocated!
3320 *
3321 * Allocate a new register, copy the value and, if updating, the
3322 * guest shadow copy assignment to the new register.
3323 */
3324 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3325 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3326 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3327 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3328
3329 /** @todo share register for readonly access. */
3330 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3331 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3332
3333 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3334 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3335
3336 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3337 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3338 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3339 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3340 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3341 else
3342 {
3343 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3344 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3345 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3346 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3347 }
3348 idxReg = idxRegNew;
3349 }
3350 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3351
3352#ifdef VBOX_STRICT
3353 /* Strict builds: Check that the value is correct. */
3354 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3355#endif
3356
3357#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3358 /** @todo r=aeichner Implement for registers other than GPR as well. */
3359 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3360 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3361 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3362 && enmGstReg <= kIemNativeGstReg_GprLast)
3363 || enmGstReg == kIemNativeGstReg_MxCsr))
3364 {
3365# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3366 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3367 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3368# endif
3369 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3370 }
3371#endif
3372
3373 return idxReg;
3374 }
3375
3376 /*
3377 * Allocate a new register, load it with the guest value and designate it as a copy of the
3378 */
3379 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3380
3381 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3382 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3383
3384 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3385 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3386 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3387 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3388
3389#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3390 /** @todo r=aeichner Implement for registers other than GPR as well. */
3391 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3392 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3393 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3394 && enmGstReg <= kIemNativeGstReg_GprLast)
3395 || enmGstReg == kIemNativeGstReg_MxCsr))
3396 {
3397# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3398 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3399 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3400# endif
3401 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3402 }
3403#endif
3404
3405 return idxRegNew;
3406}
3407
3408
3409/**
3410 * Allocates a temporary host general purpose register that already holds the
3411 * given guest register value.
3412 *
3413 * The use case for this function is places where the shadowing state cannot be
3414 * modified due to branching and such. This will fail if the we don't have a
3415 * current shadow copy handy or if it's incompatible. The only code that will
3416 * be emitted here is value checking code in strict builds.
3417 *
3418 * The intended use can only be readonly!
3419 *
3420 * @returns The host register number, UINT8_MAX if not present.
3421 * @param pReNative The native recompile state.
3422 * @param poff Pointer to the instruction buffer offset.
3423 * Will be updated in strict builds if a register is
3424 * found.
3425 * @param enmGstReg The guest register that will is to be updated.
3426 * @note In strict builds, this may throw instruction buffer growth failures.
3427 * Non-strict builds will not throw anything.
3428 * @sa iemNativeRegAllocTmpForGuestReg
3429 */
3430DECL_HIDDEN_THROW(uint8_t)
3431iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3432{
3433 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3434#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3435 AssertMsg( pReNative->idxCurCall == 0
3436 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3437 || enmGstReg == kIemNativeGstReg_Pc,
3438 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3439#endif
3440
3441 /*
3442 * First check if the guest register value is already in a host register.
3443 */
3444 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3445 {
3446 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3447 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3448 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3449 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3450
3451 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3452 {
3453 /*
3454 * We only do readonly use here, so easy compared to the other
3455 * variant of this code.
3456 */
3457 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3458 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3459 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3460 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3461 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3462
3463#ifdef VBOX_STRICT
3464 /* Strict builds: Check that the value is correct. */
3465 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3466#else
3467 RT_NOREF(poff);
3468#endif
3469 return idxReg;
3470 }
3471 }
3472
3473 return UINT8_MAX;
3474}
3475
3476
3477/**
3478 * Allocates argument registers for a function call.
3479 *
3480 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3481 * need to check the return value.
3482 * @param pReNative The native recompile state.
3483 * @param off The current code buffer offset.
3484 * @param cArgs The number of arguments the function call takes.
3485 */
3486DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3487{
3488 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3489 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3490 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3491 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3492
3493 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3494 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3495 else if (cArgs == 0)
3496 return true;
3497
3498 /*
3499 * Do we get luck and all register are free and not shadowing anything?
3500 */
3501 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3502 for (uint32_t i = 0; i < cArgs; i++)
3503 {
3504 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3505 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3506 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3507 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3508 }
3509 /*
3510 * Okay, not lucky so we have to free up the registers.
3511 */
3512 else
3513 for (uint32_t i = 0; i < cArgs; i++)
3514 {
3515 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3516 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3517 {
3518 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3519 {
3520 case kIemNativeWhat_Var:
3521 {
3522 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3524 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3525 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3526 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3527#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3528 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3529#endif
3530
3531 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3532 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3533 else
3534 {
3535 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3536 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3537 }
3538 break;
3539 }
3540
3541 case kIemNativeWhat_Tmp:
3542 case kIemNativeWhat_Arg:
3543 case kIemNativeWhat_rc:
3544 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3545 default:
3546 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3547 }
3548
3549 }
3550 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3551 {
3552 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3553 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3554 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3555#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3556 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3557#endif
3558 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3559 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3560 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3561 }
3562 else
3563 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3564 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3565 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3566 }
3567 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3568 return true;
3569}
3570
3571
3572DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3573
3574
3575#if 0
3576/**
3577 * Frees a register assignment of any type.
3578 *
3579 * @param pReNative The native recompile state.
3580 * @param idxHstReg The register to free.
3581 *
3582 * @note Does not update variables.
3583 */
3584DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3585{
3586 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3587 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3588 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3589 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3590 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3591 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3592 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3593 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3594 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3595 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3596 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3597 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3598 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3599 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3600
3601 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3602 /* no flushing, right:
3603 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3604 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3605 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3606 */
3607}
3608#endif
3609
3610
3611/**
3612 * Frees a temporary register.
3613 *
3614 * Any shadow copies of guest registers assigned to the host register will not
3615 * be flushed by this operation.
3616 */
3617DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3618{
3619 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3620 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3621 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3622 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3623 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3624}
3625
3626
3627/**
3628 * Frees a temporary immediate register.
3629 *
3630 * It is assumed that the call has not modified the register, so it still hold
3631 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3632 */
3633DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3634{
3635 iemNativeRegFreeTmp(pReNative, idxHstReg);
3636}
3637
3638
3639/**
3640 * Frees a register assigned to a variable.
3641 *
3642 * The register will be disassociated from the variable.
3643 */
3644DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3645{
3646 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3647 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3648 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3650 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3651#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3652 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3653#endif
3654
3655 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3656 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3657 if (!fFlushShadows)
3658 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3659 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3660 else
3661 {
3662 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3663 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3664#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3665 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3666#endif
3667 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3668 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3669 uint64_t fGstRegShadows = fGstRegShadowsOld;
3670 while (fGstRegShadows)
3671 {
3672 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3673 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3674
3675 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3676 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3677 }
3678 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3679 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3680 }
3681}
3682
3683
3684#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3685# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
3686/** Host CPU SIMD register names. */
3687DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3688{
3689# ifdef RT_ARCH_AMD64
3690 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3691# elif RT_ARCH_ARM64
3692 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3693 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3694# else
3695# error "port me"
3696# endif
3697};
3698# endif
3699
3700
3701/**
3702 * Frees a SIMD register assigned to a variable.
3703 *
3704 * The register will be disassociated from the variable.
3705 */
3706DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3707{
3708 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3709 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3710 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3712 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3713 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3714
3715 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3716 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3717 if (!fFlushShadows)
3718 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3719 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3720 else
3721 {
3722 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3723 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3724 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3725 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3726 uint64_t fGstRegShadows = fGstRegShadowsOld;
3727 while (fGstRegShadows)
3728 {
3729 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3730 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3731
3732 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3733 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3734 }
3735 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3736 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3737 }
3738}
3739
3740
3741/**
3742 * Reassigns a variable to a different SIMD register specified by the caller.
3743 *
3744 * @returns The new code buffer position.
3745 * @param pReNative The native recompile state.
3746 * @param off The current code buffer position.
3747 * @param idxVar The variable index.
3748 * @param idxRegOld The old host register number.
3749 * @param idxRegNew The new host register number.
3750 * @param pszCaller The caller for logging.
3751 */
3752static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3753 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3754{
3755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3756 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3757 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3758 RT_NOREF(pszCaller);
3759
3760 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3761 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3762 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3763
3764 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3765 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3766 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3767
3768 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3769 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3770 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3771
3772 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3773 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3774 else
3775 {
3776 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3777 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3778 }
3779
3780 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3781 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3782 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3783 if (fGstRegShadows)
3784 {
3785 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3786 | RT_BIT_32(idxRegNew);
3787 while (fGstRegShadows)
3788 {
3789 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3790 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3791
3792 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3793 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3794 }
3795 }
3796
3797 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3798 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3799 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3800 return off;
3801}
3802
3803
3804/**
3805 * Moves a variable to a different register or spills it onto the stack.
3806 *
3807 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3808 * kinds can easily be recreated if needed later.
3809 *
3810 * @returns The new code buffer position.
3811 * @param pReNative The native recompile state.
3812 * @param off The current code buffer position.
3813 * @param idxVar The variable index.
3814 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3815 * call-volatile registers.
3816 */
3817DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3818 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3819{
3820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3821 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3822 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3823 Assert(!pVar->fRegAcquired);
3824 Assert(!pVar->fSimdReg);
3825
3826 uint8_t const idxRegOld = pVar->idxReg;
3827 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3828 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3829 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3830 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3831 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3832 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3833 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3834 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3835 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3836 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3837
3838 /** @todo Add statistics on this.*/
3839 /** @todo Implement basic variable liveness analysis (python) so variables
3840 * can be freed immediately once no longer used. This has the potential to
3841 * be trashing registers and stack for dead variables.
3842 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3843
3844 /*
3845 * First try move it to a different register, as that's cheaper.
3846 */
3847 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3848 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3849 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3850 if (fRegs)
3851 {
3852 /* Avoid using shadow registers, if possible. */
3853 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3854 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3855 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3856 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3857 }
3858
3859 /*
3860 * Otherwise we must spill the register onto the stack.
3861 */
3862 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3863 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3864 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3865
3866 if (pVar->cbVar == sizeof(RTUINT128U))
3867 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3868 else
3869 {
3870 Assert(pVar->cbVar == sizeof(RTUINT256U));
3871 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3872 }
3873
3874 pVar->idxReg = UINT8_MAX;
3875 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3876 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3877 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3878 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3879 return off;
3880}
3881
3882
3883/**
3884 * Called right before emitting a call instruction to move anything important
3885 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3886 * optionally freeing argument variables.
3887 *
3888 * @returns New code buffer offset, UINT32_MAX on failure.
3889 * @param pReNative The native recompile state.
3890 * @param off The code buffer offset.
3891 * @param cArgs The number of arguments the function call takes.
3892 * It is presumed that the host register part of these have
3893 * been allocated as such already and won't need moving,
3894 * just freeing.
3895 * @param fKeepVars Mask of variables that should keep their register
3896 * assignments. Caller must take care to handle these.
3897 */
3898DECL_HIDDEN_THROW(uint32_t)
3899iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3900{
3901 Assert(!cArgs); RT_NOREF(cArgs);
3902
3903 /* fKeepVars will reduce this mask. */
3904 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3905
3906 /*
3907 * Move anything important out of volatile registers.
3908 */
3909 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3910#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3911 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3912#endif
3913 ;
3914
3915 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3916 if (!fSimdRegsToMove)
3917 { /* likely */ }
3918 else
3919 {
3920 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3921 while (fSimdRegsToMove != 0)
3922 {
3923 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3924 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3925
3926 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3927 {
3928 case kIemNativeWhat_Var:
3929 {
3930 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3932 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3933 Assert(pVar->idxReg == idxSimdReg);
3934 Assert(pVar->fSimdReg);
3935 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3936 {
3937 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3938 idxVar, pVar->enmKind, pVar->idxReg));
3939 if (pVar->enmKind != kIemNativeVarKind_Stack)
3940 pVar->idxReg = UINT8_MAX;
3941 else
3942 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3943 }
3944 else
3945 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3946 continue;
3947 }
3948
3949 case kIemNativeWhat_Arg:
3950 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3951 continue;
3952
3953 case kIemNativeWhat_rc:
3954 case kIemNativeWhat_Tmp:
3955 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3956 continue;
3957
3958 case kIemNativeWhat_FixedReserved:
3959#ifdef RT_ARCH_ARM64
3960 continue; /* On ARM the upper half of the virtual 256-bit register. */
3961#endif
3962
3963 case kIemNativeWhat_FixedTmp:
3964 case kIemNativeWhat_pVCpuFixed:
3965 case kIemNativeWhat_pCtxFixed:
3966 case kIemNativeWhat_PcShadow:
3967 case kIemNativeWhat_Invalid:
3968 case kIemNativeWhat_End:
3969 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3970 }
3971 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3972 }
3973 }
3974
3975 /*
3976 * Do the actual freeing.
3977 */
3978 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3979 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3980 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3981 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3982
3983 /* If there are guest register shadows in any call-volatile register, we
3984 have to clear the corrsponding guest register masks for each register. */
3985 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3986 if (fHstSimdRegsWithGstShadow)
3987 {
3988 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3989 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
3990 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
3991 do
3992 {
3993 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
3994 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
3995
3996 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
3997
3998#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3999 /*
4000 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4001 * to call volatile registers).
4002 */
4003 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4004 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4005 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4006#endif
4007 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4008 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4009
4010 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4011 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4012 } while (fHstSimdRegsWithGstShadow != 0);
4013 }
4014
4015 return off;
4016}
4017#endif
4018
4019
4020/**
4021 * Called right before emitting a call instruction to move anything important
4022 * out of call-volatile registers, free and flush the call-volatile registers,
4023 * optionally freeing argument variables.
4024 *
4025 * @returns New code buffer offset, UINT32_MAX on failure.
4026 * @param pReNative The native recompile state.
4027 * @param off The code buffer offset.
4028 * @param cArgs The number of arguments the function call takes.
4029 * It is presumed that the host register part of these have
4030 * been allocated as such already and won't need moving,
4031 * just freeing.
4032 * @param fKeepVars Mask of variables that should keep their register
4033 * assignments. Caller must take care to handle these.
4034 */
4035DECL_HIDDEN_THROW(uint32_t)
4036iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4037{
4038 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4039
4040 /* fKeepVars will reduce this mask. */
4041 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4042
4043 /*
4044 * Move anything important out of volatile registers.
4045 */
4046 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4047 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4048 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4049#ifdef IEMNATIVE_REG_FIXED_TMP0
4050 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4051#endif
4052#ifdef IEMNATIVE_REG_FIXED_TMP1
4053 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4054#endif
4055#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4056 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4057#endif
4058 & ~g_afIemNativeCallRegs[cArgs];
4059
4060 fRegsToMove &= pReNative->Core.bmHstRegs;
4061 if (!fRegsToMove)
4062 { /* likely */ }
4063 else
4064 {
4065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4066 while (fRegsToMove != 0)
4067 {
4068 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4069 fRegsToMove &= ~RT_BIT_32(idxReg);
4070
4071 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4072 {
4073 case kIemNativeWhat_Var:
4074 {
4075 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4077 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4078 Assert(pVar->idxReg == idxReg);
4079#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4080 Assert(!pVar->fSimdReg);
4081#endif
4082 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4083 {
4084 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4085 idxVar, pVar->enmKind, pVar->idxReg));
4086 if (pVar->enmKind != kIemNativeVarKind_Stack)
4087 pVar->idxReg = UINT8_MAX;
4088 else
4089 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4090 }
4091 else
4092 fRegsToFree &= ~RT_BIT_32(idxReg);
4093 continue;
4094 }
4095
4096 case kIemNativeWhat_Arg:
4097 AssertMsgFailed(("What?!?: %u\n", idxReg));
4098 continue;
4099
4100 case kIemNativeWhat_rc:
4101 case kIemNativeWhat_Tmp:
4102 AssertMsgFailed(("Missing free: %u\n", idxReg));
4103 continue;
4104
4105 case kIemNativeWhat_FixedTmp:
4106 case kIemNativeWhat_pVCpuFixed:
4107 case kIemNativeWhat_pCtxFixed:
4108 case kIemNativeWhat_PcShadow:
4109 case kIemNativeWhat_FixedReserved:
4110 case kIemNativeWhat_Invalid:
4111 case kIemNativeWhat_End:
4112 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4113 }
4114 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4115 }
4116 }
4117
4118 /*
4119 * Do the actual freeing.
4120 */
4121 if (pReNative->Core.bmHstRegs & fRegsToFree)
4122 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4123 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4124 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4125
4126 /* If there are guest register shadows in any call-volatile register, we
4127 have to clear the corrsponding guest register masks for each register. */
4128 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4129 if (fHstRegsWithGstShadow)
4130 {
4131 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4132 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4133 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4134 do
4135 {
4136 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4137 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4138
4139 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4140
4141#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4142 /*
4143 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4144 * to call volatile registers).
4145 */
4146 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4147 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4148 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4149#endif
4150
4151 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4152 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4153 } while (fHstRegsWithGstShadow != 0);
4154 }
4155
4156#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4157 /* Now for the SIMD registers, no argument support for now. */
4158 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4159#endif
4160
4161 return off;
4162}
4163
4164
4165/**
4166 * Flushes a set of guest register shadow copies.
4167 *
4168 * This is usually done after calling a threaded function or a C-implementation
4169 * of an instruction.
4170 *
4171 * @param pReNative The native recompile state.
4172 * @param fGstRegs Set of guest registers to flush.
4173 */
4174DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4175{
4176 /*
4177 * Reduce the mask by what's currently shadowed
4178 */
4179 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4180 fGstRegs &= bmGstRegShadowsOld;
4181 if (fGstRegs)
4182 {
4183 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4184 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4185 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4186 if (bmGstRegShadowsNew)
4187 {
4188 /*
4189 * Partial.
4190 */
4191 do
4192 {
4193 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4194 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4195 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4196 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4197 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4199 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4200#endif
4201
4202 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4203 fGstRegs &= ~fInThisHstReg;
4204 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4205 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4206 if (!fGstRegShadowsNew)
4207 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4208 } while (fGstRegs != 0);
4209 }
4210 else
4211 {
4212 /*
4213 * Clear all.
4214 */
4215 do
4216 {
4217 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4218 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4219 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4220 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4221 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4222#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4223 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4224#endif
4225
4226 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4227 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4228 } while (fGstRegs != 0);
4229 pReNative->Core.bmHstRegsWithGstShadow = 0;
4230 }
4231 }
4232}
4233
4234
4235/**
4236 * Flushes guest register shadow copies held by a set of host registers.
4237 *
4238 * This is used with the TLB lookup code for ensuring that we don't carry on
4239 * with any guest shadows in volatile registers, as these will get corrupted by
4240 * a TLB miss.
4241 *
4242 * @param pReNative The native recompile state.
4243 * @param fHstRegs Set of host registers to flush guest shadows for.
4244 */
4245DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4246{
4247 /*
4248 * Reduce the mask by what's currently shadowed.
4249 */
4250 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4251 fHstRegs &= bmHstRegsWithGstShadowOld;
4252 if (fHstRegs)
4253 {
4254 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4255 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4256 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4257 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4258 if (bmHstRegsWithGstShadowNew)
4259 {
4260 /*
4261 * Partial (likely).
4262 */
4263 uint64_t fGstShadows = 0;
4264 do
4265 {
4266 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4267 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4268 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4269 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4270#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4271 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4272#endif
4273
4274 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4275 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4276 fHstRegs &= ~RT_BIT_32(idxHstReg);
4277 } while (fHstRegs != 0);
4278 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4279 }
4280 else
4281 {
4282 /*
4283 * Clear all.
4284 */
4285 do
4286 {
4287 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4288 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4289 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4290 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4291#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4292 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4293#endif
4294
4295 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4296 fHstRegs &= ~RT_BIT_32(idxHstReg);
4297 } while (fHstRegs != 0);
4298 pReNative->Core.bmGstRegShadows = 0;
4299 }
4300 }
4301}
4302
4303
4304/**
4305 * Restores guest shadow copies in volatile registers.
4306 *
4307 * This is used after calling a helper function (think TLB miss) to restore the
4308 * register state of volatile registers.
4309 *
4310 * @param pReNative The native recompile state.
4311 * @param off The code buffer offset.
4312 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4313 * be active (allocated) w/o asserting. Hack.
4314 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4315 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4316 */
4317DECL_HIDDEN_THROW(uint32_t)
4318iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4319{
4320 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4321 if (fHstRegs)
4322 {
4323 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4324 do
4325 {
4326 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4327
4328 /* It's not fatal if a register is active holding a variable that
4329 shadowing a guest register, ASSUMING all pending guest register
4330 writes were flushed prior to the helper call. However, we'll be
4331 emitting duplicate restores, so it wasts code space. */
4332 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4333 RT_NOREF(fHstRegsActiveShadows);
4334
4335 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4337 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4338#endif
4339 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4340 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4342
4343 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4344 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4345
4346 fHstRegs &= ~RT_BIT_32(idxHstReg);
4347 } while (fHstRegs != 0);
4348 }
4349 return off;
4350}
4351
4352
4353
4354
4355/*********************************************************************************************************************************
4356* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4357*********************************************************************************************************************************/
4358#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4359
4360/**
4361 * Info about shadowed guest SIMD register values.
4362 * @see IEMNATIVEGSTSIMDREG
4363 */
4364static struct
4365{
4366 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4367 uint32_t offXmm;
4368 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4369 uint32_t offYmm;
4370 /** Name (for logging). */
4371 const char *pszName;
4372} const g_aGstSimdShadowInfo[] =
4373{
4374#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4375 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4376 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4377 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4378 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4379 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4380 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4381 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4382 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4383 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4384 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4385 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4386 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4387 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4388 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4389 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4390 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4391 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4392#undef CPUMCTX_OFF_AND_SIZE
4393};
4394AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4395
4396
4397/**
4398 * Frees a temporary SIMD register.
4399 *
4400 * Any shadow copies of guest registers assigned to the host register will not
4401 * be flushed by this operation.
4402 */
4403DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4404{
4405 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4406 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4407 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4408 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4409 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4410}
4411
4412
4413/**
4414 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4415 *
4416 * @returns New code bufferoffset.
4417 * @param pReNative The native recompile state.
4418 * @param off Current code buffer position.
4419 * @param enmGstSimdReg The guest SIMD register to flush.
4420 */
4421DECL_HIDDEN_THROW(uint32_t)
4422iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4423{
4424 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4425
4426 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4427 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4428 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4429 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4430
4431 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4432 {
4433 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4434 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4435 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4436 }
4437
4438 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4439 {
4440 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4441 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4442 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4443 }
4444
4445 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4446 return off;
4447}
4448
4449
4450/**
4451 * Flush the given set of guest SIMD registers if marked as dirty.
4452 *
4453 * @returns New code buffer offset.
4454 * @param pReNative The native recompile state.
4455 * @param off Current code buffer position.
4456 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4457 */
4458DECL_HIDDEN_THROW(uint32_t)
4459iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4460{
4461 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4462 & fFlushGstSimdReg;
4463 if (bmGstSimdRegShadowDirty)
4464 {
4465# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4466 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4467 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4468# endif
4469
4470 do
4471 {
4472 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4473 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4474 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4475 } while (bmGstSimdRegShadowDirty);
4476 }
4477
4478 return off;
4479}
4480
4481
4482#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4483/**
4484 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4485 *
4486 * @returns New code buffer offset.
4487 * @param pReNative The native recompile state.
4488 * @param off Current code buffer position.
4489 * @param idxHstSimdReg The host SIMD register.
4490 *
4491 * @note This doesn't do any unshadowing of guest registers from the host register.
4492 */
4493DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4494{
4495 /* We need to flush any pending guest register writes this host register shadows. */
4496 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4497 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4498 if (bmGstSimdRegShadowDirty)
4499 {
4500# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4501 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4502 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4503# endif
4504
4505 do
4506 {
4507 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4508 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4509 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4510 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4511 } while (bmGstSimdRegShadowDirty);
4512 }
4513
4514 return off;
4515}
4516#endif
4517
4518
4519/**
4520 * Locate a register, possibly freeing one up.
4521 *
4522 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4523 * failed.
4524 *
4525 * @returns Host register number on success. Returns UINT8_MAX if no registers
4526 * found, the caller is supposed to deal with this and raise a
4527 * allocation type specific status code (if desired).
4528 *
4529 * @throws VBox status code if we're run into trouble spilling a variable of
4530 * recording debug info. Does NOT throw anything if we're out of
4531 * registers, though.
4532 */
4533static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4534 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4535{
4536 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4537 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4538 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4539
4540 /*
4541 * Try a freed register that's shadowing a guest register.
4542 */
4543 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4544 if (fRegs)
4545 {
4546 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4547
4548#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4549 /*
4550 * When we have livness information, we use it to kick out all shadowed
4551 * guest register that will not be needed any more in this TB. If we're
4552 * lucky, this may prevent us from ending up here again.
4553 *
4554 * Note! We must consider the previous entry here so we don't free
4555 * anything that the current threaded function requires (current
4556 * entry is produced by the next threaded function).
4557 */
4558 uint32_t const idxCurCall = pReNative->idxCurCall;
4559 if (idxCurCall > 0)
4560 {
4561 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4562
4563# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4564 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4565 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4566 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4567#else
4568 /* Construct a mask of the registers not in the read or write state.
4569 Note! We could skips writes, if they aren't from us, as this is just
4570 a hack to prevent trashing registers that have just been written
4571 or will be written when we retire the current instruction. */
4572 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4573 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4574 & IEMLIVENESSBIT_MASK;
4575#endif
4576 /* If it matches any shadowed registers. */
4577 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4578 {
4579 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4580 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4581 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4582
4583 /* See if we've got any unshadowed registers we can return now. */
4584 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4585 if (fUnshadowedRegs)
4586 {
4587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4588 return (fPreferVolatile
4589 ? ASMBitFirstSetU32(fUnshadowedRegs)
4590 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4591 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4592 - 1;
4593 }
4594 }
4595 }
4596#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4597
4598 unsigned const idxReg = (fPreferVolatile
4599 ? ASMBitFirstSetU32(fRegs)
4600 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4601 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4602 - 1;
4603
4604 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4605 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4606 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4607 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4608
4609 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4610 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4611
4612 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4613 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4614 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4615 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4616 return idxReg;
4617 }
4618
4619 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4620
4621 /*
4622 * Try free up a variable that's in a register.
4623 *
4624 * We do two rounds here, first evacuating variables we don't need to be
4625 * saved on the stack, then in the second round move things to the stack.
4626 */
4627 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4628 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4629 {
4630 uint32_t fVars = pReNative->Core.bmVars;
4631 while (fVars)
4632 {
4633 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4634 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4635 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4636 continue;
4637
4638 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4639 && (RT_BIT_32(idxReg) & fRegMask)
4640 && ( iLoop == 0
4641 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4642 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4643 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4644 {
4645 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4646 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4647 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4648 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4649 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4650 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4651
4652 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4653 {
4654 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4655 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4656 }
4657
4658 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4659 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4660
4661 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4662 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4663 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4664 return idxReg;
4665 }
4666 fVars &= ~RT_BIT_32(idxVar);
4667 }
4668 }
4669
4670 AssertFailed();
4671 return UINT8_MAX;
4672}
4673
4674
4675/**
4676 * Flushes a set of guest register shadow copies.
4677 *
4678 * This is usually done after calling a threaded function or a C-implementation
4679 * of an instruction.
4680 *
4681 * @param pReNative The native recompile state.
4682 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4683 */
4684DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4685{
4686 /*
4687 * Reduce the mask by what's currently shadowed
4688 */
4689 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4690 fGstSimdRegs &= bmGstSimdRegShadows;
4691 if (fGstSimdRegs)
4692 {
4693 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4694 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4695 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4696 if (bmGstSimdRegShadowsNew)
4697 {
4698 /*
4699 * Partial.
4700 */
4701 do
4702 {
4703 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4704 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4705 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4706 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4707 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4708 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4709
4710 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4711 fGstSimdRegs &= ~fInThisHstReg;
4712 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4713 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4714 if (!fGstRegShadowsNew)
4715 {
4716 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4717 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4718 }
4719 } while (fGstSimdRegs != 0);
4720 }
4721 else
4722 {
4723 /*
4724 * Clear all.
4725 */
4726 do
4727 {
4728 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4729 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4730 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4731 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4732 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4733 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4734
4735 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4736 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4737 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4738 } while (fGstSimdRegs != 0);
4739 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4740 }
4741 }
4742}
4743
4744
4745/**
4746 * Allocates a temporary host SIMD register.
4747 *
4748 * This may emit code to save register content onto the stack in order to free
4749 * up a register.
4750 *
4751 * @returns The host register number; throws VBox status code on failure,
4752 * so no need to check the return value.
4753 * @param pReNative The native recompile state.
4754 * @param poff Pointer to the variable with the code buffer position.
4755 * This will be update if we need to move a variable from
4756 * register to stack in order to satisfy the request.
4757 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4758 * registers (@c true, default) or the other way around
4759 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4760 */
4761DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4762{
4763 /*
4764 * Try find a completely unused register, preferably a call-volatile one.
4765 */
4766 uint8_t idxSimdReg;
4767 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4768 & ~pReNative->Core.bmHstRegsWithGstShadow
4769 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4770 if (fRegs)
4771 {
4772 if (fPreferVolatile)
4773 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4774 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4775 else
4776 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4777 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4778 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4779 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4780
4781 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4782 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4783 }
4784 else
4785 {
4786 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4787 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4788 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4789 }
4790
4791 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4792 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4793}
4794
4795
4796/**
4797 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4798 * registers.
4799 *
4800 * @returns The host register number; throws VBox status code on failure,
4801 * so no need to check the return value.
4802 * @param pReNative The native recompile state.
4803 * @param poff Pointer to the variable with the code buffer position.
4804 * This will be update if we need to move a variable from
4805 * register to stack in order to satisfy the request.
4806 * @param fRegMask Mask of acceptable registers.
4807 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4808 * registers (@c true, default) or the other way around
4809 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4810 */
4811DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4812 bool fPreferVolatile /*= true*/)
4813{
4814 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4815 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4816
4817 /*
4818 * Try find a completely unused register, preferably a call-volatile one.
4819 */
4820 uint8_t idxSimdReg;
4821 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4822 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4823 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4824 & fRegMask;
4825 if (fRegs)
4826 {
4827 if (fPreferVolatile)
4828 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4829 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4830 else
4831 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4832 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4833 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4834 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4835
4836 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4837 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4838 }
4839 else
4840 {
4841 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4842 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4843 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4844 }
4845
4846 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4847 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4848}
4849
4850
4851/**
4852 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4853 *
4854 * @param pReNative The native recompile state.
4855 * @param idxHstSimdReg The host SIMD register to update the state for.
4856 * @param enmLoadSz The load size to set.
4857 */
4858DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4859 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4860{
4861 /* Everything valid already? -> nothing to do. */
4862 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4863 return;
4864
4865 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4866 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4867 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4868 {
4869 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4870 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4871 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4872 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4873 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4874 }
4875}
4876
4877
4878static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4879 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4880{
4881 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4882 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4883 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4884 {
4885# ifdef RT_ARCH_ARM64
4886 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4887 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4888# endif
4889
4890 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4891 {
4892 switch (enmLoadSzDst)
4893 {
4894 case kIemNativeGstSimdRegLdStSz_256:
4895 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4896 break;
4897 case kIemNativeGstSimdRegLdStSz_Low128:
4898 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4899 break;
4900 case kIemNativeGstSimdRegLdStSz_High128:
4901 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4902 break;
4903 default:
4904 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4905 }
4906
4907 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4908 }
4909 }
4910 else
4911 {
4912 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4913 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4914 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4915 }
4916
4917 return off;
4918}
4919
4920
4921/**
4922 * Allocates a temporary host SIMD register for keeping a guest
4923 * SIMD register value.
4924 *
4925 * Since we may already have a register holding the guest register value,
4926 * code will be emitted to do the loading if that's not the case. Code may also
4927 * be emitted if we have to free up a register to satify the request.
4928 *
4929 * @returns The host register number; throws VBox status code on failure, so no
4930 * need to check the return value.
4931 * @param pReNative The native recompile state.
4932 * @param poff Pointer to the variable with the code buffer
4933 * position. This will be update if we need to move a
4934 * variable from register to stack in order to satisfy
4935 * the request.
4936 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4937 * @param enmIntendedUse How the caller will be using the host register.
4938 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4939 * register is okay (default). The ASSUMPTION here is
4940 * that the caller has already flushed all volatile
4941 * registers, so this is only applied if we allocate a
4942 * new register.
4943 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4944 */
4945DECL_HIDDEN_THROW(uint8_t)
4946iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4947 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4948 bool fNoVolatileRegs /*= false*/)
4949{
4950 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4951#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4952 AssertMsg( pReNative->idxCurCall == 0
4953 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4954 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4955 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4956 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4957 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4958 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4959#endif
4960#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4961 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4962#endif
4963 uint32_t const fRegMask = !fNoVolatileRegs
4964 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4965 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4966
4967 /*
4968 * First check if the guest register value is already in a host register.
4969 */
4970 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4971 {
4972 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4973 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4974 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4975 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4976
4977 /* It's not supposed to be allocated... */
4978 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4979 {
4980 /*
4981 * If the register will trash the guest shadow copy, try find a
4982 * completely unused register we can use instead. If that fails,
4983 * we need to disassociate the host reg from the guest reg.
4984 */
4985 /** @todo would be nice to know if preserving the register is in any way helpful. */
4986 /* If the purpose is calculations, try duplicate the register value as
4987 we'll be clobbering the shadow. */
4988 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4989 && ( ~pReNative->Core.bmHstSimdRegs
4990 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4991 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
4992 {
4993 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
4994
4995 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
4996
4997 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
4998 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
4999 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5000 idxSimdReg = idxRegNew;
5001 }
5002 /* If the current register matches the restrictions, go ahead and allocate
5003 it for the caller. */
5004 else if (fRegMask & RT_BIT_32(idxSimdReg))
5005 {
5006 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5007 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5008 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5009 {
5010 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5011 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5012 else
5013 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5014 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5015 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5016 }
5017 else
5018 {
5019 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5020 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5021 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5022 }
5023 }
5024 /* Otherwise, allocate a register that satisfies the caller and transfer
5025 the shadowing if compatible with the intended use. (This basically
5026 means the call wants a non-volatile register (RSP push/pop scenario).) */
5027 else
5028 {
5029 Assert(fNoVolatileRegs);
5030 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5031 !fNoVolatileRegs
5032 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5033 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5034 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5035 {
5036 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5037 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5038 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5039 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5040 }
5041 else
5042 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5043 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5044 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5045 idxSimdReg = idxRegNew;
5046 }
5047 }
5048 else
5049 {
5050 /*
5051 * Oops. Shadowed guest register already allocated!
5052 *
5053 * Allocate a new register, copy the value and, if updating, the
5054 * guest shadow copy assignment to the new register.
5055 */
5056 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5057 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5058 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5059 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5060
5061 /** @todo share register for readonly access. */
5062 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5063 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5064
5065 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5066 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5067 else
5068 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5069
5070 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5071 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5072 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5073 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5074 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5075 else
5076 {
5077 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5078 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5079 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5080 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5081 }
5082 idxSimdReg = idxRegNew;
5083 }
5084 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5085
5086#ifdef VBOX_STRICT
5087 /* Strict builds: Check that the value is correct. */
5088 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5089 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5090#endif
5091
5092 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5093 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5094 {
5095# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5096 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5097 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5098# endif
5099
5100 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5101 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5102 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5103 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5104 else
5105 {
5106 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5107 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5108 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5109 }
5110 }
5111
5112 return idxSimdReg;
5113 }
5114
5115 /*
5116 * Allocate a new register, load it with the guest value and designate it as a copy of the
5117 */
5118 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5119
5120 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5121 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5122 else
5123 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5124
5125 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5126 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5127
5128 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5129 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5130 {
5131# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5132 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5133 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5134# endif
5135
5136 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5137 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5138 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5139 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5140 else
5141 {
5142 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5143 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5144 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5145 }
5146 }
5147
5148 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5149 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5150
5151 return idxRegNew;
5152}
5153
5154
5155/**
5156 * Flushes guest SIMD register shadow copies held by a set of host registers.
5157 *
5158 * This is used whenever calling an external helper for ensuring that we don't carry on
5159 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5160 *
5161 * @param pReNative The native recompile state.
5162 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5163 */
5164DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5165{
5166 /*
5167 * Reduce the mask by what's currently shadowed.
5168 */
5169 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5170 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5171 if (fHstSimdRegs)
5172 {
5173 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5174 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5175 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5176 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5177 if (bmHstSimdRegsWithGstShadowNew)
5178 {
5179 /*
5180 * Partial (likely).
5181 */
5182 uint64_t fGstShadows = 0;
5183 do
5184 {
5185 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5186 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5187 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5188 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5189 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5190 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5191
5192 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5193 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5194 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5195 } while (fHstSimdRegs != 0);
5196 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5197 }
5198 else
5199 {
5200 /*
5201 * Clear all.
5202 */
5203 do
5204 {
5205 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5206 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5207 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5208 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5209 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5210 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5211
5212 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5213 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5214 } while (fHstSimdRegs != 0);
5215 pReNative->Core.bmGstSimdRegShadows = 0;
5216 }
5217 }
5218}
5219#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5220
5221
5222
5223/*********************************************************************************************************************************
5224* Code emitters for flushing pending guest register writes and sanity checks *
5225*********************************************************************************************************************************/
5226
5227#ifdef VBOX_STRICT
5228/**
5229 * Does internal register allocator sanity checks.
5230 */
5231DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5232{
5233 /*
5234 * Iterate host registers building a guest shadowing set.
5235 */
5236 uint64_t bmGstRegShadows = 0;
5237 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5238 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5239 while (bmHstRegsWithGstShadow)
5240 {
5241 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5242 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5243 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5244
5245 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5246 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5247 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5248 bmGstRegShadows |= fThisGstRegShadows;
5249 while (fThisGstRegShadows)
5250 {
5251 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5252 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5253 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5254 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5255 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5256 }
5257 }
5258 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5259 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5260 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5261
5262 /*
5263 * Now the other way around, checking the guest to host index array.
5264 */
5265 bmHstRegsWithGstShadow = 0;
5266 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5267 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5268 while (bmGstRegShadows)
5269 {
5270 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5271 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5272 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5273
5274 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5275 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5276 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5277 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5278 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5279 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5280 }
5281 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5282 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5283 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5284}
5285#endif /* VBOX_STRICT */
5286
5287
5288/**
5289 * Flushes any delayed guest register writes.
5290 *
5291 * This must be called prior to calling CImpl functions and any helpers that use
5292 * the guest state (like raising exceptions) and such.
5293 *
5294 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5295 * the caller if it wishes to do so.
5296 */
5297DECL_HIDDEN_THROW(uint32_t)
5298iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5299{
5300#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5301 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5302 off = iemNativeEmitPcWriteback(pReNative, off);
5303#else
5304 RT_NOREF(pReNative, fGstShwExcept);
5305#endif
5306
5307#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5308 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5309#endif
5310
5311#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5312 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5313#endif
5314
5315 return off;
5316}
5317
5318
5319#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5320/**
5321 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5322 */
5323DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5324{
5325 Assert(pReNative->Core.offPc);
5326# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5327 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5328 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5329# endif
5330
5331# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5332 /* Allocate a temporary PC register. */
5333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5334
5335 /* Perform the addition and store the result. */
5336 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5337 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5338
5339 /* Free but don't flush the PC register. */
5340 iemNativeRegFreeTmp(pReNative, idxPcReg);
5341# else
5342 /* Compare the shadow with the context value, they should match. */
5343 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5344 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5345# endif
5346
5347 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5348 pReNative->Core.offPc = 0;
5349 pReNative->Core.cInstrPcUpdateSkipped = 0;
5350
5351 return off;
5352}
5353#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5354
5355
5356/*********************************************************************************************************************************
5357* Code Emitters (larger snippets) *
5358*********************************************************************************************************************************/
5359
5360/**
5361 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5362 * extending to 64-bit width.
5363 *
5364 * @returns New code buffer offset on success, UINT32_MAX on failure.
5365 * @param pReNative .
5366 * @param off The current code buffer position.
5367 * @param idxHstReg The host register to load the guest register value into.
5368 * @param enmGstReg The guest register to load.
5369 *
5370 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5371 * that is something the caller needs to do if applicable.
5372 */
5373DECL_HIDDEN_THROW(uint32_t)
5374iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5375{
5376 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5377 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5378
5379 switch (g_aGstShadowInfo[enmGstReg].cb)
5380 {
5381 case sizeof(uint64_t):
5382 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5383 case sizeof(uint32_t):
5384 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5385 case sizeof(uint16_t):
5386 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5387#if 0 /* not present in the table. */
5388 case sizeof(uint8_t):
5389 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5390#endif
5391 default:
5392 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5393 }
5394}
5395
5396
5397#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5398/**
5399 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5400 *
5401 * @returns New code buffer offset on success, UINT32_MAX on failure.
5402 * @param pReNative The recompiler state.
5403 * @param off The current code buffer position.
5404 * @param idxHstSimdReg The host register to load the guest register value into.
5405 * @param enmGstSimdReg The guest register to load.
5406 * @param enmLoadSz The load size of the register.
5407 *
5408 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5409 * that is something the caller needs to do if applicable.
5410 */
5411DECL_HIDDEN_THROW(uint32_t)
5412iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5413 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5414{
5415 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5416
5417 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5418 switch (enmLoadSz)
5419 {
5420 case kIemNativeGstSimdRegLdStSz_256:
5421 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5422 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5423 case kIemNativeGstSimdRegLdStSz_Low128:
5424 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5425 case kIemNativeGstSimdRegLdStSz_High128:
5426 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5427 default:
5428 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5429 }
5430}
5431#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5432
5433#ifdef VBOX_STRICT
5434
5435/**
5436 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5437 *
5438 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5439 * Trashes EFLAGS on AMD64.
5440 */
5441DECL_HIDDEN_THROW(uint32_t)
5442iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5443{
5444# ifdef RT_ARCH_AMD64
5445 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5446
5447 /* rol reg64, 32 */
5448 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5449 pbCodeBuf[off++] = 0xc1;
5450 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5451 pbCodeBuf[off++] = 32;
5452
5453 /* test reg32, ffffffffh */
5454 if (idxReg >= 8)
5455 pbCodeBuf[off++] = X86_OP_REX_B;
5456 pbCodeBuf[off++] = 0xf7;
5457 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5458 pbCodeBuf[off++] = 0xff;
5459 pbCodeBuf[off++] = 0xff;
5460 pbCodeBuf[off++] = 0xff;
5461 pbCodeBuf[off++] = 0xff;
5462
5463 /* je/jz +1 */
5464 pbCodeBuf[off++] = 0x74;
5465 pbCodeBuf[off++] = 0x01;
5466
5467 /* int3 */
5468 pbCodeBuf[off++] = 0xcc;
5469
5470 /* rol reg64, 32 */
5471 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5472 pbCodeBuf[off++] = 0xc1;
5473 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5474 pbCodeBuf[off++] = 32;
5475
5476# elif defined(RT_ARCH_ARM64)
5477 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5478 /* lsr tmp0, reg64, #32 */
5479 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5480 /* cbz tmp0, +1 */
5481 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5482 /* brk #0x1100 */
5483 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5484
5485# else
5486# error "Port me!"
5487# endif
5488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5489 return off;
5490}
5491
5492
5493/**
5494 * Emitting code that checks that the content of register @a idxReg is the same
5495 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5496 * instruction if that's not the case.
5497 *
5498 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5499 * Trashes EFLAGS on AMD64.
5500 */
5501DECL_HIDDEN_THROW(uint32_t)
5502iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5503{
5504#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5505 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5506 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5507 return off;
5508#endif
5509
5510# ifdef RT_ARCH_AMD64
5511 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5512
5513 /* cmp reg, [mem] */
5514 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5515 {
5516 if (idxReg >= 8)
5517 pbCodeBuf[off++] = X86_OP_REX_R;
5518 pbCodeBuf[off++] = 0x38;
5519 }
5520 else
5521 {
5522 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5523 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5524 else
5525 {
5526 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5527 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5528 else
5529 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5531 if (idxReg >= 8)
5532 pbCodeBuf[off++] = X86_OP_REX_R;
5533 }
5534 pbCodeBuf[off++] = 0x39;
5535 }
5536 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5537
5538 /* je/jz +1 */
5539 pbCodeBuf[off++] = 0x74;
5540 pbCodeBuf[off++] = 0x01;
5541
5542 /* int3 */
5543 pbCodeBuf[off++] = 0xcc;
5544
5545 /* For values smaller than the register size, we must check that the rest
5546 of the register is all zeros. */
5547 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5548 {
5549 /* test reg64, imm32 */
5550 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5551 pbCodeBuf[off++] = 0xf7;
5552 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5553 pbCodeBuf[off++] = 0;
5554 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5555 pbCodeBuf[off++] = 0xff;
5556 pbCodeBuf[off++] = 0xff;
5557
5558 /* je/jz +1 */
5559 pbCodeBuf[off++] = 0x74;
5560 pbCodeBuf[off++] = 0x01;
5561
5562 /* int3 */
5563 pbCodeBuf[off++] = 0xcc;
5564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5565 }
5566 else
5567 {
5568 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5569 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5570 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5571 }
5572
5573# elif defined(RT_ARCH_ARM64)
5574 /* mov TMP0, [gstreg] */
5575 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5576
5577 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5578 /* sub tmp0, tmp0, idxReg */
5579 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5580 /* cbz tmp0, +1 */
5581 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5582 /* brk #0x1000+enmGstReg */
5583 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5585
5586# else
5587# error "Port me!"
5588# endif
5589 return off;
5590}
5591
5592
5593# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5594# ifdef RT_ARCH_AMD64
5595/**
5596 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5597 */
5598DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5599{
5600 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5601 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5602 if (idxSimdReg >= 8)
5603 pbCodeBuf[off++] = X86_OP_REX_R;
5604 pbCodeBuf[off++] = 0x0f;
5605 pbCodeBuf[off++] = 0x38;
5606 pbCodeBuf[off++] = 0x29;
5607 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5608
5609 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5610 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5611 pbCodeBuf[off++] = X86_OP_REX_W
5612 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5613 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5614 pbCodeBuf[off++] = 0x0f;
5615 pbCodeBuf[off++] = 0x3a;
5616 pbCodeBuf[off++] = 0x16;
5617 pbCodeBuf[off++] = 0xeb;
5618 pbCodeBuf[off++] = 0x00;
5619
5620 /* cmp tmp0, 0xffffffffffffffff. */
5621 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5622 pbCodeBuf[off++] = 0x83;
5623 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5624 pbCodeBuf[off++] = 0xff;
5625
5626 /* je/jz +1 */
5627 pbCodeBuf[off++] = 0x74;
5628 pbCodeBuf[off++] = 0x01;
5629
5630 /* int3 */
5631 pbCodeBuf[off++] = 0xcc;
5632
5633 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5634 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5635 pbCodeBuf[off++] = X86_OP_REX_W
5636 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5637 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5638 pbCodeBuf[off++] = 0x0f;
5639 pbCodeBuf[off++] = 0x3a;
5640 pbCodeBuf[off++] = 0x16;
5641 pbCodeBuf[off++] = 0xeb;
5642 pbCodeBuf[off++] = 0x01;
5643
5644 /* cmp tmp0, 0xffffffffffffffff. */
5645 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5646 pbCodeBuf[off++] = 0x83;
5647 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5648 pbCodeBuf[off++] = 0xff;
5649
5650 /* je/jz +1 */
5651 pbCodeBuf[off++] = 0x74;
5652 pbCodeBuf[off++] = 0x01;
5653
5654 /* int3 */
5655 pbCodeBuf[off++] = 0xcc;
5656
5657 return off;
5658}
5659# endif
5660
5661
5662/**
5663 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5664 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5665 * instruction if that's not the case.
5666 *
5667 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5668 * Trashes EFLAGS on AMD64.
5669 */
5670DECL_HIDDEN_THROW(uint32_t)
5671iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5672 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5673{
5674 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5675 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5676 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5677 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5678 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5679 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5680 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5681 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5682 return off;
5683
5684# ifdef RT_ARCH_AMD64
5685 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5686 {
5687 /* movdqa vectmp0, idxSimdReg */
5688 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5689
5690 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5691
5692 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5693 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5694 }
5695
5696 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5697 {
5698 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5699 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5700
5701 /* vextracti128 vectmp0, idxSimdReg, 1 */
5702 pbCodeBuf[off++] = X86_OP_VEX3;
5703 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5704 | X86_OP_VEX3_BYTE1_X
5705 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5706 | 0x03; /* Opcode map */
5707 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5708 pbCodeBuf[off++] = 0x39;
5709 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5710 pbCodeBuf[off++] = 0x01;
5711
5712 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5713 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5714 }
5715# elif defined(RT_ARCH_ARM64)
5716 /* mov vectmp0, [gstreg] */
5717 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5718
5719 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5720 {
5721 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5722 /* eor vectmp0, vectmp0, idxSimdReg */
5723 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5724 /* uaddlv vectmp0, vectmp0.16B */
5725 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5726 /* umov tmp0, vectmp0.H[0] */
5727 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5728 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5729 /* cbz tmp0, +1 */
5730 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5731 /* brk #0x1000+enmGstReg */
5732 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5733 }
5734
5735 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5736 {
5737 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5738 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5739 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5740 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5741 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5742 /* umov tmp0, (vectmp0 + 1).H[0] */
5743 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5744 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5745 /* cbz tmp0, +1 */
5746 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5747 /* brk #0x1000+enmGstReg */
5748 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5749 }
5750
5751# else
5752# error "Port me!"
5753# endif
5754
5755 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5756 return off;
5757}
5758# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5759
5760
5761/**
5762 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5763 * important bits.
5764 *
5765 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5766 * Trashes EFLAGS on AMD64.
5767 */
5768DECL_HIDDEN_THROW(uint32_t)
5769iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5770{
5771 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5772 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5773 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5774 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5775
5776#ifdef RT_ARCH_AMD64
5777 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5778
5779 /* je/jz +1 */
5780 pbCodeBuf[off++] = 0x74;
5781 pbCodeBuf[off++] = 0x01;
5782
5783 /* int3 */
5784 pbCodeBuf[off++] = 0xcc;
5785
5786# elif defined(RT_ARCH_ARM64)
5787 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5788
5789 /* b.eq +1 */
5790 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5791 /* brk #0x2000 */
5792 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5793
5794# else
5795# error "Port me!"
5796# endif
5797 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5798
5799 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5800 return off;
5801}
5802
5803#endif /* VBOX_STRICT */
5804
5805
5806#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5807/**
5808 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5809 */
5810DECL_HIDDEN_THROW(uint32_t)
5811iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5812{
5813 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5814
5815 fEflNeeded &= X86_EFL_STATUS_BITS;
5816 if (fEflNeeded)
5817 {
5818# ifdef RT_ARCH_AMD64
5819 /* test dword [pVCpu + offVCpu], imm32 */
5820 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5821 if (fEflNeeded <= 0xff)
5822 {
5823 pCodeBuf[off++] = 0xf6;
5824 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5825 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5826 }
5827 else
5828 {
5829 pCodeBuf[off++] = 0xf7;
5830 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5831 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5832 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5833 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5834 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5835 }
5836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5837
5838# else
5839 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5840 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5841 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5842# ifdef RT_ARCH_ARM64
5843 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5844 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5845# else
5846# error "Port me!"
5847# endif
5848 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5849# endif
5850 }
5851 return off;
5852}
5853#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5854
5855
5856/**
5857 * Emits a code for checking the return code of a call and rcPassUp, returning
5858 * from the code if either are non-zero.
5859 */
5860DECL_HIDDEN_THROW(uint32_t)
5861iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5862{
5863#ifdef RT_ARCH_AMD64
5864 /*
5865 * AMD64: eax = call status code.
5866 */
5867
5868 /* edx = rcPassUp */
5869 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5870# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5871 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5872# endif
5873
5874 /* edx = eax | rcPassUp */
5875 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5876 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5877 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5879
5880 /* Jump to non-zero status return path. */
5881 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5882
5883 /* done. */
5884
5885#elif RT_ARCH_ARM64
5886 /*
5887 * ARM64: w0 = call status code.
5888 */
5889# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5890 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5891# endif
5892 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5893
5894 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5895
5896 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5897
5898 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5899 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5900 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5901
5902#else
5903# error "port me"
5904#endif
5905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5906 RT_NOREF_PV(idxInstr);
5907 return off;
5908}
5909
5910
5911/**
5912 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5913 * raising a \#GP(0) if it isn't.
5914 *
5915 * @returns New code buffer offset, UINT32_MAX on failure.
5916 * @param pReNative The native recompile state.
5917 * @param off The code buffer offset.
5918 * @param idxAddrReg The host register with the address to check.
5919 * @param idxInstr The current instruction.
5920 */
5921DECL_HIDDEN_THROW(uint32_t)
5922iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5923{
5924 /*
5925 * Make sure we don't have any outstanding guest register writes as we may
5926 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5927 */
5928 off = iemNativeRegFlushPendingWrites(pReNative, off);
5929
5930#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5931 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5932#else
5933 RT_NOREF(idxInstr);
5934#endif
5935
5936#ifdef RT_ARCH_AMD64
5937 /*
5938 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5939 * return raisexcpt();
5940 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5941 */
5942 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5943
5944 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5945 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5946 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5947 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5948 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5949
5950 iemNativeRegFreeTmp(pReNative, iTmpReg);
5951
5952#elif defined(RT_ARCH_ARM64)
5953 /*
5954 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5955 * return raisexcpt();
5956 * ----
5957 * mov x1, 0x800000000000
5958 * add x1, x0, x1
5959 * cmp xzr, x1, lsr 48
5960 * b.ne .Lraisexcpt
5961 */
5962 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5963
5964 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5965 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5966 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5967 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5968
5969 iemNativeRegFreeTmp(pReNative, iTmpReg);
5970
5971#else
5972# error "Port me"
5973#endif
5974 return off;
5975}
5976
5977
5978/**
5979 * Emits code to check if that the content of @a idxAddrReg is within the limit
5980 * of CS, raising a \#GP(0) if it isn't.
5981 *
5982 * @returns New code buffer offset; throws VBox status code on error.
5983 * @param pReNative The native recompile state.
5984 * @param off The code buffer offset.
5985 * @param idxAddrReg The host register (32-bit) with the address to
5986 * check.
5987 * @param idxInstr The current instruction.
5988 */
5989DECL_HIDDEN_THROW(uint32_t)
5990iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5991 uint8_t idxAddrReg, uint8_t idxInstr)
5992{
5993 /*
5994 * Make sure we don't have any outstanding guest register writes as we may
5995 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5996 */
5997 off = iemNativeRegFlushPendingWrites(pReNative, off);
5998
5999#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6000 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6001#else
6002 RT_NOREF(idxInstr);
6003#endif
6004
6005 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6006 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6007 kIemNativeGstRegUse_ReadOnly);
6008
6009 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6010 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6011
6012 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6013 return off;
6014}
6015
6016
6017/**
6018 * Emits a call to a CImpl function or something similar.
6019 */
6020DECL_HIDDEN_THROW(uint32_t)
6021iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6022 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6023{
6024 /* Writeback everything. */
6025 off = iemNativeRegFlushPendingWrites(pReNative, off);
6026
6027 /*
6028 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6029 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6030 */
6031 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6032 fGstShwFlush
6033 | RT_BIT_64(kIemNativeGstReg_Pc)
6034 | RT_BIT_64(kIemNativeGstReg_EFlags));
6035 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6036
6037 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6038
6039 /*
6040 * Load the parameters.
6041 */
6042#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6043 /* Special code the hidden VBOXSTRICTRC pointer. */
6044 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6045 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6046 if (cAddParams > 0)
6047 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6048 if (cAddParams > 1)
6049 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6050 if (cAddParams > 2)
6051 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6052 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6053
6054#else
6055 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6056 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6057 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6058 if (cAddParams > 0)
6059 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6060 if (cAddParams > 1)
6061 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6062 if (cAddParams > 2)
6063# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6064 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6065# else
6066 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6067# endif
6068#endif
6069
6070 /*
6071 * Make the call.
6072 */
6073 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6074
6075#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6076 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6077#endif
6078
6079 /*
6080 * Check the status code.
6081 */
6082 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6083}
6084
6085
6086/**
6087 * Emits a call to a threaded worker function.
6088 */
6089DECL_HIDDEN_THROW(uint32_t)
6090iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6091{
6092 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6093
6094 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6095 off = iemNativeRegFlushPendingWrites(pReNative, off);
6096
6097 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6098 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6099
6100#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6101 /* The threaded function may throw / long jmp, so set current instruction
6102 number if we're counting. */
6103 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6104#endif
6105
6106 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6107
6108#ifdef RT_ARCH_AMD64
6109 /* Load the parameters and emit the call. */
6110# ifdef RT_OS_WINDOWS
6111# ifndef VBOXSTRICTRC_STRICT_ENABLED
6112 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6113 if (cParams > 0)
6114 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6115 if (cParams > 1)
6116 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6117 if (cParams > 2)
6118 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6119# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6120 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6121 if (cParams > 0)
6122 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6123 if (cParams > 1)
6124 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6125 if (cParams > 2)
6126 {
6127 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6128 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6129 }
6130 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6131# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6132# else
6133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6134 if (cParams > 0)
6135 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6136 if (cParams > 1)
6137 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6138 if (cParams > 2)
6139 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6140# endif
6141
6142 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6143
6144# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6145 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6146# endif
6147
6148#elif RT_ARCH_ARM64
6149 /*
6150 * ARM64:
6151 */
6152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6153 if (cParams > 0)
6154 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6155 if (cParams > 1)
6156 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6157 if (cParams > 2)
6158 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6159
6160 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6161
6162#else
6163# error "port me"
6164#endif
6165
6166 /*
6167 * Check the status code.
6168 */
6169 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6170
6171 return off;
6172}
6173
6174#ifdef VBOX_WITH_STATISTICS
6175/**
6176 * Emits code to update the thread call statistics.
6177 */
6178DECL_INLINE_THROW(uint32_t)
6179iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6180{
6181 /*
6182 * Update threaded function stats.
6183 */
6184 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6185 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6186# if defined(RT_ARCH_ARM64)
6187 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6188 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6189 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6190 iemNativeRegFreeTmp(pReNative, idxTmp1);
6191 iemNativeRegFreeTmp(pReNative, idxTmp2);
6192# else
6193 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6194# endif
6195 return off;
6196}
6197#endif /* VBOX_WITH_STATISTICS */
6198
6199
6200/**
6201 * Emits the code at the ReturnWithFlags label (returns
6202 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6203 */
6204static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6205{
6206 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6207 if (idxLabel != UINT32_MAX)
6208 {
6209 iemNativeLabelDefine(pReNative, idxLabel, off);
6210
6211#ifdef VBOX_WITH_STATISTICS
6212 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6213 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6214 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2,
6215 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitReturnWithFlags));
6216 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6217 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6218#endif
6219
6220 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6221
6222 /* jump back to the return sequence. */
6223 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6224 }
6225 return off;
6226}
6227
6228
6229/**
6230 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6231 */
6232static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6233{
6234 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6235 if (idxLabel != UINT32_MAX)
6236 {
6237 iemNativeLabelDefine(pReNative, idxLabel, off);
6238
6239#ifdef VBOX_WITH_STATISTICS
6240 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6241 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6242 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2,
6243 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitReturnBreak));
6244 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6245 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6246#endif
6247
6248 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6249
6250 /* jump back to the return sequence. */
6251 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6252 }
6253 return off;
6254}
6255
6256
6257/**
6258 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6259 */
6260static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6261{
6262 /*
6263 * Generate the rc + rcPassUp fiddling code if needed.
6264 */
6265 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6266 if (idxLabel != UINT32_MAX)
6267 {
6268 iemNativeLabelDefine(pReNative, idxLabel, off);
6269
6270 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6271#ifdef RT_ARCH_AMD64
6272# ifdef RT_OS_WINDOWS
6273# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6274 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6275# endif
6276 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6277 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6278# else
6279 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6280 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6281# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6282 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6283# endif
6284# endif
6285# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6286 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6287# endif
6288
6289#else
6290 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6291 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6292 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6293#endif
6294
6295 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6296 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6297 }
6298 return off;
6299}
6300
6301
6302/**
6303 * Emits a standard epilog.
6304 */
6305static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6306{
6307 *pidxReturnLabel = UINT32_MAX;
6308
6309 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6310 off = iemNativeRegFlushPendingWrites(pReNative, off);
6311
6312 /*
6313 * Successful return, so clear the return register (eax, w0).
6314 */
6315 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6316
6317 /*
6318 * Define label for common return point.
6319 */
6320 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6321 *pidxReturnLabel = idxReturn;
6322
6323 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6324
6325 /*
6326 * Restore registers and return.
6327 */
6328#ifdef RT_ARCH_AMD64
6329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6330
6331 /* Reposition esp at the r15 restore point. */
6332 pbCodeBuf[off++] = X86_OP_REX_W;
6333 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6334 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6335 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6336
6337 /* Pop non-volatile registers and return */
6338 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6339 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6340 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6341 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6342 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6343 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6344 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6345 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6346# ifdef RT_OS_WINDOWS
6347 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6348 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6349# endif
6350 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6351 pbCodeBuf[off++] = 0xc9; /* leave */
6352 pbCodeBuf[off++] = 0xc3; /* ret */
6353 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6354
6355#elif RT_ARCH_ARM64
6356 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6357
6358 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6359 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6360 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6361 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6362 IEMNATIVE_FRAME_VAR_SIZE / 8);
6363 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6364 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6365 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6366 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6367 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6368 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6369 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6370 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6371 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6372 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6373 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6374 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6375
6376 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6377 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6378 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6379 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6380
6381 /* retab / ret */
6382# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6383 if (1)
6384 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6385 else
6386# endif
6387 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6388
6389#else
6390# error "port me"
6391#endif
6392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6393
6394 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6395}
6396
6397
6398/**
6399 * Emits a standard prolog.
6400 */
6401static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6402{
6403#ifdef RT_ARCH_AMD64
6404 /*
6405 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6406 * reserving 64 bytes for stack variables plus 4 non-register argument
6407 * slots. Fixed register assignment: xBX = pReNative;
6408 *
6409 * Since we always do the same register spilling, we can use the same
6410 * unwind description for all the code.
6411 */
6412 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6413 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6414 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6415 pbCodeBuf[off++] = 0x8b;
6416 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6417 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6418 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6419# ifdef RT_OS_WINDOWS
6420 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6421 pbCodeBuf[off++] = 0x8b;
6422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6423 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6424 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6425# else
6426 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6427 pbCodeBuf[off++] = 0x8b;
6428 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6429# endif
6430 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6431 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6432 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6433 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6434 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6435 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6436 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6437 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6438
6439# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6440 /* Save the frame pointer. */
6441 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6442# endif
6443
6444 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6445 X86_GREG_xSP,
6446 IEMNATIVE_FRAME_ALIGN_SIZE
6447 + IEMNATIVE_FRAME_VAR_SIZE
6448 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6449 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6450 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6451 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6452 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6453
6454#elif RT_ARCH_ARM64
6455 /*
6456 * We set up a stack frame exactly like on x86, only we have to push the
6457 * return address our selves here. We save all non-volatile registers.
6458 */
6459 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6460
6461# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6462 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6463 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6464 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6465 /* pacibsp */
6466 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6467# endif
6468
6469 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6470 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6471 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6472 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6473 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6474 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6475 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6476 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6477 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6478 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6479 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6480 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6481 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6482 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6483 /* Save the BP and LR (ret address) registers at the top of the frame. */
6484 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6485 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6486 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6487 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6488 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6489 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6490
6491 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6492 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6493
6494 /* mov r28, r0 */
6495 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6496 /* mov r27, r1 */
6497 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6498
6499# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6500 /* Save the frame pointer. */
6501 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6502 ARMV8_A64_REG_X2);
6503# endif
6504
6505#else
6506# error "port me"
6507#endif
6508 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6509 return off;
6510}
6511
6512
6513/*********************************************************************************************************************************
6514* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6515*********************************************************************************************************************************/
6516
6517/**
6518 * Internal work that allocates a variable with kind set to
6519 * kIemNativeVarKind_Invalid and no current stack allocation.
6520 *
6521 * The kind will either be set by the caller or later when the variable is first
6522 * assigned a value.
6523 *
6524 * @returns Unpacked index.
6525 * @internal
6526 */
6527static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6528{
6529 Assert(cbType > 0 && cbType <= 64);
6530 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6531 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6532 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6533 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6534 pReNative->Core.aVars[idxVar].cbVar = cbType;
6535 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6536 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6537 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6538 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6539 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6540 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6541 pReNative->Core.aVars[idxVar].u.uValue = 0;
6542#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6543 pReNative->Core.aVars[idxVar].fSimdReg = false;
6544#endif
6545 return idxVar;
6546}
6547
6548
6549/**
6550 * Internal work that allocates an argument variable w/o setting enmKind.
6551 *
6552 * @returns Unpacked index.
6553 * @internal
6554 */
6555static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6556{
6557 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6558 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6559 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6560
6561 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6562 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6563 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6564 return idxVar;
6565}
6566
6567
6568/**
6569 * Gets the stack slot for a stack variable, allocating one if necessary.
6570 *
6571 * Calling this function implies that the stack slot will contain a valid
6572 * variable value. The caller deals with any register currently assigned to the
6573 * variable, typically by spilling it into the stack slot.
6574 *
6575 * @returns The stack slot number.
6576 * @param pReNative The recompiler state.
6577 * @param idxVar The variable.
6578 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6579 */
6580DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6581{
6582 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6583 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6584 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6585
6586 /* Already got a slot? */
6587 uint8_t const idxStackSlot = pVar->idxStackSlot;
6588 if (idxStackSlot != UINT8_MAX)
6589 {
6590 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6591 return idxStackSlot;
6592 }
6593
6594 /*
6595 * A single slot is easy to allocate.
6596 * Allocate them from the top end, closest to BP, to reduce the displacement.
6597 */
6598 if (pVar->cbVar <= sizeof(uint64_t))
6599 {
6600 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6601 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6602 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6603 pVar->idxStackSlot = (uint8_t)iSlot;
6604 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6605 return (uint8_t)iSlot;
6606 }
6607
6608 /*
6609 * We need more than one stack slot.
6610 *
6611 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6612 */
6613 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6614 Assert(pVar->cbVar <= 64);
6615 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6616 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6617 uint32_t bmStack = pReNative->Core.bmStack;
6618 while (bmStack != UINT32_MAX)
6619 {
6620 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6621 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6622 iSlot = (iSlot - 1) & ~fBitAlignMask;
6623 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6624 {
6625 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6626 pVar->idxStackSlot = (uint8_t)iSlot;
6627 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6628 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6629 return (uint8_t)iSlot;
6630 }
6631
6632 bmStack |= (fBitAllocMask << iSlot);
6633 }
6634 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6635}
6636
6637
6638/**
6639 * Changes the variable to a stack variable.
6640 *
6641 * Currently this is s only possible to do the first time the variable is used,
6642 * switching later is can be implemented but not done.
6643 *
6644 * @param pReNative The recompiler state.
6645 * @param idxVar The variable.
6646 * @throws VERR_IEM_VAR_IPE_2
6647 */
6648DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6649{
6650 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6651 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6652 if (pVar->enmKind != kIemNativeVarKind_Stack)
6653 {
6654 /* We could in theory transition from immediate to stack as well, but it
6655 would involve the caller doing work storing the value on the stack. So,
6656 till that's required we only allow transition from invalid. */
6657 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6658 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6659 pVar->enmKind = kIemNativeVarKind_Stack;
6660
6661 /* Note! We don't allocate a stack slot here, that's only done when a
6662 slot is actually needed to hold a variable value. */
6663 }
6664}
6665
6666
6667/**
6668 * Sets it to a variable with a constant value.
6669 *
6670 * This does not require stack storage as we know the value and can always
6671 * reload it, unless of course it's referenced.
6672 *
6673 * @param pReNative The recompiler state.
6674 * @param idxVar The variable.
6675 * @param uValue The immediate value.
6676 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6677 */
6678DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6679{
6680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6681 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6682 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6683 {
6684 /* Only simple transitions for now. */
6685 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6686 pVar->enmKind = kIemNativeVarKind_Immediate;
6687 }
6688 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6689
6690 pVar->u.uValue = uValue;
6691 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6692 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6693 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6694}
6695
6696
6697/**
6698 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6699 *
6700 * This does not require stack storage as we know the value and can always
6701 * reload it. Loading is postponed till needed.
6702 *
6703 * @param pReNative The recompiler state.
6704 * @param idxVar The variable. Unpacked.
6705 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6706 *
6707 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6708 * @internal
6709 */
6710static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6711{
6712 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6713 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6714
6715 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6716 {
6717 /* Only simple transitions for now. */
6718 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6719 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6720 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6721 }
6722 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6723
6724 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6725
6726 /* Update the other variable, ensure it's a stack variable. */
6727 /** @todo handle variables with const values... that'll go boom now. */
6728 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6729 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6730}
6731
6732
6733/**
6734 * Sets the variable to a reference (pointer) to a guest register reference.
6735 *
6736 * This does not require stack storage as we know the value and can always
6737 * reload it. Loading is postponed till needed.
6738 *
6739 * @param pReNative The recompiler state.
6740 * @param idxVar The variable.
6741 * @param enmRegClass The class guest registers to reference.
6742 * @param idxReg The register within @a enmRegClass to reference.
6743 *
6744 * @throws VERR_IEM_VAR_IPE_2
6745 */
6746DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6747 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6748{
6749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6750 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6751
6752 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6753 {
6754 /* Only simple transitions for now. */
6755 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6756 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6757 }
6758 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6759
6760 pVar->u.GstRegRef.enmClass = enmRegClass;
6761 pVar->u.GstRegRef.idx = idxReg;
6762}
6763
6764
6765DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6766{
6767 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6768}
6769
6770
6771DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6772{
6773 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6774
6775 /* Since we're using a generic uint64_t value type, we must truncate it if
6776 the variable is smaller otherwise we may end up with too large value when
6777 scaling up a imm8 w/ sign-extension.
6778
6779 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6780 in the bios, bx=1) when running on arm, because clang expect 16-bit
6781 register parameters to have bits 16 and up set to zero. Instead of
6782 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6783 CF value in the result. */
6784 switch (cbType)
6785 {
6786 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6787 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6788 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6789 }
6790 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6791 return idxVar;
6792}
6793
6794
6795DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6796{
6797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6798 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6799 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6800 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6801 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6802 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6803
6804 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6805 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6806 return idxArgVar;
6807}
6808
6809
6810DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6811{
6812 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6813 /* Don't set to stack now, leave that to the first use as for instance
6814 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6815 return idxVar;
6816}
6817
6818
6819DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6820{
6821 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6822
6823 /* Since we're using a generic uint64_t value type, we must truncate it if
6824 the variable is smaller otherwise we may end up with too large value when
6825 scaling up a imm8 w/ sign-extension. */
6826 switch (cbType)
6827 {
6828 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6829 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6830 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6831 }
6832 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6833 return idxVar;
6834}
6835
6836
6837DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6838{
6839 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6840 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6841
6842 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6843 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6844
6845 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6846
6847 /* Truncate the value to this variables size. */
6848 switch (cbType)
6849 {
6850 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6851 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6852 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6853 }
6854
6855 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6856 iemNativeVarRegisterRelease(pReNative, idxVar);
6857 return idxVar;
6858}
6859
6860
6861/**
6862 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6863 * fixed till we call iemNativeVarRegisterRelease.
6864 *
6865 * @returns The host register number.
6866 * @param pReNative The recompiler state.
6867 * @param idxVar The variable.
6868 * @param poff Pointer to the instruction buffer offset.
6869 * In case a register needs to be freed up or the value
6870 * loaded off the stack.
6871 * @param fInitialized Set if the variable must already have been initialized.
6872 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6873 * the case.
6874 * @param idxRegPref Preferred register number or UINT8_MAX.
6875 */
6876DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6877 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6878{
6879 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6880 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6881 Assert(pVar->cbVar <= 8);
6882 Assert(!pVar->fRegAcquired);
6883
6884 uint8_t idxReg = pVar->idxReg;
6885 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6886 {
6887 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6888 && pVar->enmKind < kIemNativeVarKind_End);
6889 pVar->fRegAcquired = true;
6890 return idxReg;
6891 }
6892
6893 /*
6894 * If the kind of variable has not yet been set, default to 'stack'.
6895 */
6896 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6897 && pVar->enmKind < kIemNativeVarKind_End);
6898 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6899 iemNativeVarSetKindToStack(pReNative, idxVar);
6900
6901 /*
6902 * We have to allocate a register for the variable, even if its a stack one
6903 * as we don't know if there are modification being made to it before its
6904 * finalized (todo: analyze and insert hints about that?).
6905 *
6906 * If we can, we try get the correct register for argument variables. This
6907 * is assuming that most argument variables are fetched as close as possible
6908 * to the actual call, so that there aren't any interfering hidden calls
6909 * (memory accesses, etc) inbetween.
6910 *
6911 * If we cannot or it's a variable, we make sure no argument registers
6912 * that will be used by this MC block will be allocated here, and we always
6913 * prefer non-volatile registers to avoid needing to spill stuff for internal
6914 * call.
6915 */
6916 /** @todo Detect too early argument value fetches and warn about hidden
6917 * calls causing less optimal code to be generated in the python script. */
6918
6919 uint8_t const uArgNo = pVar->uArgNo;
6920 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6921 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6922 {
6923 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6924
6925#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6926 /* Writeback any dirty shadow registers we are about to unshadow. */
6927 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6928#endif
6929
6930 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6931 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6932 }
6933 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6934 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6935 {
6936 /** @todo there must be a better way for this and boot cArgsX? */
6937 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6938 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6939 & ~pReNative->Core.bmHstRegsWithGstShadow
6940 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6941 & fNotArgsMask;
6942 if (fRegs)
6943 {
6944 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6945 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6946 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6947 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6948 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6949 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6950 }
6951 else
6952 {
6953 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6954 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6955 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6956 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6957 }
6958 }
6959 else
6960 {
6961 idxReg = idxRegPref;
6962 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6963 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6964 }
6965 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6966 pVar->idxReg = idxReg;
6967
6968#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6969 pVar->fSimdReg = false;
6970#endif
6971
6972 /*
6973 * Load it off the stack if we've got a stack slot.
6974 */
6975 uint8_t const idxStackSlot = pVar->idxStackSlot;
6976 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6977 {
6978 Assert(fInitialized);
6979 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6980 switch (pVar->cbVar)
6981 {
6982 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6983 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6984 case 3: AssertFailed(); RT_FALL_THRU();
6985 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6986 default: AssertFailed(); RT_FALL_THRU();
6987 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6988 }
6989 }
6990 else
6991 {
6992 Assert(idxStackSlot == UINT8_MAX);
6993 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6994 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6995 else
6996 {
6997 /*
6998 * Convert from immediate to stack/register. This is currently only
6999 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7000 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7001 */
7002 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7003 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7004 idxVar, idxReg, pVar->u.uValue));
7005 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7006 pVar->enmKind = kIemNativeVarKind_Stack;
7007 }
7008 }
7009
7010 pVar->fRegAcquired = true;
7011 return idxReg;
7012}
7013
7014
7015#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7016/**
7017 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7018 * fixed till we call iemNativeVarRegisterRelease.
7019 *
7020 * @returns The host register number.
7021 * @param pReNative The recompiler state.
7022 * @param idxVar The variable.
7023 * @param poff Pointer to the instruction buffer offset.
7024 * In case a register needs to be freed up or the value
7025 * loaded off the stack.
7026 * @param fInitialized Set if the variable must already have been initialized.
7027 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7028 * the case.
7029 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7030 */
7031DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7032 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7033{
7034 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7035 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7036 Assert( pVar->cbVar == sizeof(RTUINT128U)
7037 || pVar->cbVar == sizeof(RTUINT256U));
7038 Assert(!pVar->fRegAcquired);
7039
7040 uint8_t idxReg = pVar->idxReg;
7041 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7042 {
7043 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7044 && pVar->enmKind < kIemNativeVarKind_End);
7045 pVar->fRegAcquired = true;
7046 return idxReg;
7047 }
7048
7049 /*
7050 * If the kind of variable has not yet been set, default to 'stack'.
7051 */
7052 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7053 && pVar->enmKind < kIemNativeVarKind_End);
7054 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7055 iemNativeVarSetKindToStack(pReNative, idxVar);
7056
7057 /*
7058 * We have to allocate a register for the variable, even if its a stack one
7059 * as we don't know if there are modification being made to it before its
7060 * finalized (todo: analyze and insert hints about that?).
7061 *
7062 * If we can, we try get the correct register for argument variables. This
7063 * is assuming that most argument variables are fetched as close as possible
7064 * to the actual call, so that there aren't any interfering hidden calls
7065 * (memory accesses, etc) inbetween.
7066 *
7067 * If we cannot or it's a variable, we make sure no argument registers
7068 * that will be used by this MC block will be allocated here, and we always
7069 * prefer non-volatile registers to avoid needing to spill stuff for internal
7070 * call.
7071 */
7072 /** @todo Detect too early argument value fetches and warn about hidden
7073 * calls causing less optimal code to be generated in the python script. */
7074
7075 uint8_t const uArgNo = pVar->uArgNo;
7076 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7077
7078 /* SIMD is bit simpler for now because there is no support for arguments. */
7079 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7080 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7081 {
7082 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7083 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7084 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7085 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7086 & fNotArgsMask;
7087 if (fRegs)
7088 {
7089 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7090 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7091 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7092 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7093 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7094 }
7095 else
7096 {
7097 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7098 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7099 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7100 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7101 }
7102 }
7103 else
7104 {
7105 idxReg = idxRegPref;
7106 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7107 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7108 }
7109 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7110
7111 pVar->fSimdReg = true;
7112 pVar->idxReg = idxReg;
7113
7114 /*
7115 * Load it off the stack if we've got a stack slot.
7116 */
7117 uint8_t const idxStackSlot = pVar->idxStackSlot;
7118 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7119 {
7120 Assert(fInitialized);
7121 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7122 switch (pVar->cbVar)
7123 {
7124 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7125 default: AssertFailed(); RT_FALL_THRU();
7126 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7127 }
7128 }
7129 else
7130 {
7131 Assert(idxStackSlot == UINT8_MAX);
7132 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7133 }
7134 pVar->fRegAcquired = true;
7135 return idxReg;
7136}
7137#endif
7138
7139
7140/**
7141 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7142 * guest register.
7143 *
7144 * This function makes sure there is a register for it and sets it to be the
7145 * current shadow copy of @a enmGstReg.
7146 *
7147 * @returns The host register number.
7148 * @param pReNative The recompiler state.
7149 * @param idxVar The variable.
7150 * @param enmGstReg The guest register this variable will be written to
7151 * after this call.
7152 * @param poff Pointer to the instruction buffer offset.
7153 * In case a register needs to be freed up or if the
7154 * variable content needs to be loaded off the stack.
7155 *
7156 * @note We DO NOT expect @a idxVar to be an argument variable,
7157 * because we can only in the commit stage of an instruction when this
7158 * function is used.
7159 */
7160DECL_HIDDEN_THROW(uint8_t)
7161iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7162{
7163 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7164 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7165 Assert(!pVar->fRegAcquired);
7166 AssertMsgStmt( pVar->cbVar <= 8
7167 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7168 || pVar->enmKind == kIemNativeVarKind_Stack),
7169 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7170 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7171 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7172
7173 /*
7174 * This shouldn't ever be used for arguments, unless it's in a weird else
7175 * branch that doesn't do any calling and even then it's questionable.
7176 *
7177 * However, in case someone writes crazy wrong MC code and does register
7178 * updates before making calls, just use the regular register allocator to
7179 * ensure we get a register suitable for the intended argument number.
7180 */
7181 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7182
7183 /*
7184 * If there is already a register for the variable, we transfer/set the
7185 * guest shadow copy assignment to it.
7186 */
7187 uint8_t idxReg = pVar->idxReg;
7188 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7189 {
7190#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7191 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7192 {
7193# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7194 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7195 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7196# endif
7197 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7198 }
7199#endif
7200
7201 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7202 {
7203 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7204 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7205 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7206 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7207 }
7208 else
7209 {
7210 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7211 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7212 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7213 }
7214 /** @todo figure this one out. We need some way of making sure the register isn't
7215 * modified after this point, just in case we start writing crappy MC code. */
7216 pVar->enmGstReg = enmGstReg;
7217 pVar->fRegAcquired = true;
7218 return idxReg;
7219 }
7220 Assert(pVar->uArgNo == UINT8_MAX);
7221
7222 /*
7223 * Because this is supposed to be the commit stage, we're just tag along with the
7224 * temporary register allocator and upgrade it to a variable register.
7225 */
7226 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7227 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7228 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7229 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7230 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7231 pVar->idxReg = idxReg;
7232
7233 /*
7234 * Now we need to load the register value.
7235 */
7236 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7237 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7238 else
7239 {
7240 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7241 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7242 switch (pVar->cbVar)
7243 {
7244 case sizeof(uint64_t):
7245 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7246 break;
7247 case sizeof(uint32_t):
7248 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7249 break;
7250 case sizeof(uint16_t):
7251 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7252 break;
7253 case sizeof(uint8_t):
7254 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7255 break;
7256 default:
7257 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7258 }
7259 }
7260
7261 pVar->fRegAcquired = true;
7262 return idxReg;
7263}
7264
7265
7266/**
7267 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7268 *
7269 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7270 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7271 * requirement of flushing anything in volatile host registers when making a
7272 * call.
7273 *
7274 * @returns New @a off value.
7275 * @param pReNative The recompiler state.
7276 * @param off The code buffer position.
7277 * @param fHstRegsNotToSave Set of registers not to save & restore.
7278 */
7279DECL_HIDDEN_THROW(uint32_t)
7280iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7281{
7282 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7283 if (fHstRegs)
7284 {
7285 do
7286 {
7287 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7288 fHstRegs &= ~RT_BIT_32(idxHstReg);
7289
7290 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7291 {
7292 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7293 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7294 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7295 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7296 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7297 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7298 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7299 {
7300 case kIemNativeVarKind_Stack:
7301 {
7302 /* Temporarily spill the variable register. */
7303 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7304 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7305 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7306 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7307 continue;
7308 }
7309
7310 case kIemNativeVarKind_Immediate:
7311 case kIemNativeVarKind_VarRef:
7312 case kIemNativeVarKind_GstRegRef:
7313 /* It is weird to have any of these loaded at this point. */
7314 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7315 continue;
7316
7317 case kIemNativeVarKind_End:
7318 case kIemNativeVarKind_Invalid:
7319 break;
7320 }
7321 AssertFailed();
7322 }
7323 else
7324 {
7325 /*
7326 * Allocate a temporary stack slot and spill the register to it.
7327 */
7328 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7329 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7330 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7331 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7332 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7333 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7334 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7335 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7336 }
7337 } while (fHstRegs);
7338 }
7339#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7340
7341 /*
7342 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7343 * which would be more difficult due to spanning multiple stack slots and different sizes
7344 * (besides we only have a limited amount of slots at the moment).
7345 *
7346 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7347 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7348 */
7349 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7350
7351 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7352 if (fHstRegs)
7353 {
7354 do
7355 {
7356 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7357 fHstRegs &= ~RT_BIT_32(idxHstReg);
7358
7359 /* Fixed reserved and temporary registers don't need saving. */
7360 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7361 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7362 continue;
7363
7364 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7365
7366 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7368 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7369 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7370 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7371 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7372 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7373 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7374 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7375 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7376 {
7377 case kIemNativeVarKind_Stack:
7378 {
7379 /* Temporarily spill the variable register. */
7380 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7381 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7382 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7383 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7384 if (cbVar == sizeof(RTUINT128U))
7385 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7386 else
7387 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7388 continue;
7389 }
7390
7391 case kIemNativeVarKind_Immediate:
7392 case kIemNativeVarKind_VarRef:
7393 case kIemNativeVarKind_GstRegRef:
7394 /* It is weird to have any of these loaded at this point. */
7395 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7396 continue;
7397
7398 case kIemNativeVarKind_End:
7399 case kIemNativeVarKind_Invalid:
7400 break;
7401 }
7402 AssertFailed();
7403 } while (fHstRegs);
7404 }
7405#endif
7406 return off;
7407}
7408
7409
7410/**
7411 * Emit code to restore volatile registers after to a call to a helper.
7412 *
7413 * @returns New @a off value.
7414 * @param pReNative The recompiler state.
7415 * @param off The code buffer position.
7416 * @param fHstRegsNotToSave Set of registers not to save & restore.
7417 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7418 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7419 */
7420DECL_HIDDEN_THROW(uint32_t)
7421iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7422{
7423 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7424 if (fHstRegs)
7425 {
7426 do
7427 {
7428 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7429 fHstRegs &= ~RT_BIT_32(idxHstReg);
7430
7431 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7432 {
7433 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7434 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7435 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7436 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7437 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7438 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7439 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7440 {
7441 case kIemNativeVarKind_Stack:
7442 {
7443 /* Unspill the variable register. */
7444 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7445 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7446 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7447 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7448 continue;
7449 }
7450
7451 case kIemNativeVarKind_Immediate:
7452 case kIemNativeVarKind_VarRef:
7453 case kIemNativeVarKind_GstRegRef:
7454 /* It is weird to have any of these loaded at this point. */
7455 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7456 continue;
7457
7458 case kIemNativeVarKind_End:
7459 case kIemNativeVarKind_Invalid:
7460 break;
7461 }
7462 AssertFailed();
7463 }
7464 else
7465 {
7466 /*
7467 * Restore from temporary stack slot.
7468 */
7469 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7470 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7471 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7472 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7473
7474 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7475 }
7476 } while (fHstRegs);
7477 }
7478#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7479 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7480 if (fHstRegs)
7481 {
7482 do
7483 {
7484 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7485 fHstRegs &= ~RT_BIT_32(idxHstReg);
7486
7487 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7488 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7489 continue;
7490 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7491
7492 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7494 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7495 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7496 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7497 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7498 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7499 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7500 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7501 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7502 {
7503 case kIemNativeVarKind_Stack:
7504 {
7505 /* Unspill the variable register. */
7506 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7507 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7508 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7509 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7510
7511 if (cbVar == sizeof(RTUINT128U))
7512 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7513 else
7514 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7515 continue;
7516 }
7517
7518 case kIemNativeVarKind_Immediate:
7519 case kIemNativeVarKind_VarRef:
7520 case kIemNativeVarKind_GstRegRef:
7521 /* It is weird to have any of these loaded at this point. */
7522 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7523 continue;
7524
7525 case kIemNativeVarKind_End:
7526 case kIemNativeVarKind_Invalid:
7527 break;
7528 }
7529 AssertFailed();
7530 } while (fHstRegs);
7531 }
7532#endif
7533 return off;
7534}
7535
7536
7537/**
7538 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7539 *
7540 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7541 *
7542 * ASSUMES that @a idxVar is valid and unpacked.
7543 */
7544DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7545{
7546 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7547 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7548 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7549 {
7550 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7551 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7552 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7553 Assert(cSlots > 0);
7554 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7555 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7556 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7557 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7558 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7559 }
7560 else
7561 Assert(idxStackSlot == UINT8_MAX);
7562}
7563
7564
7565/**
7566 * Worker that frees a single variable.
7567 *
7568 * ASSUMES that @a idxVar is valid and unpacked.
7569 */
7570DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7571{
7572 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7573 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7574 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7575
7576 /* Free the host register first if any assigned. */
7577 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7578#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7579 if ( idxHstReg != UINT8_MAX
7580 && pReNative->Core.aVars[idxVar].fSimdReg)
7581 {
7582 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7583 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7584 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7585 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7586 }
7587 else
7588#endif
7589 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7590 {
7591 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7592 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7593 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7594 }
7595
7596 /* Free argument mapping. */
7597 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7598 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7599 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7600
7601 /* Free the stack slots. */
7602 iemNativeVarFreeStackSlots(pReNative, idxVar);
7603
7604 /* Free the actual variable. */
7605 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7606 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7607}
7608
7609
7610/**
7611 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7612 */
7613DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7614{
7615 while (bmVars != 0)
7616 {
7617 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7618 bmVars &= ~RT_BIT_32(idxVar);
7619
7620#if 1 /** @todo optimize by simplifying this later... */
7621 iemNativeVarFreeOneWorker(pReNative, idxVar);
7622#else
7623 /* Only need to free the host register, the rest is done as bulk updates below. */
7624 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7625 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7626 {
7627 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7628 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7629 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7630 }
7631#endif
7632 }
7633#if 0 /** @todo optimize by simplifying this later... */
7634 pReNative->Core.bmVars = 0;
7635 pReNative->Core.bmStack = 0;
7636 pReNative->Core.u64ArgVars = UINT64_MAX;
7637#endif
7638}
7639
7640
7641
7642/*********************************************************************************************************************************
7643* Emitters for IEM_MC_CALL_CIMPL_XXX *
7644*********************************************************************************************************************************/
7645
7646/**
7647 * Emits code to load a reference to the given guest register into @a idxGprDst.
7648 */
7649DECL_HIDDEN_THROW(uint32_t)
7650iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7651 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7652{
7653#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7654 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7655#endif
7656
7657 /*
7658 * Get the offset relative to the CPUMCTX structure.
7659 */
7660 uint32_t offCpumCtx;
7661 switch (enmClass)
7662 {
7663 case kIemNativeGstRegRef_Gpr:
7664 Assert(idxRegInClass < 16);
7665 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7666 break;
7667
7668 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7669 Assert(idxRegInClass < 4);
7670 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7671 break;
7672
7673 case kIemNativeGstRegRef_EFlags:
7674 Assert(idxRegInClass == 0);
7675 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7676 break;
7677
7678 case kIemNativeGstRegRef_MxCsr:
7679 Assert(idxRegInClass == 0);
7680 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7681 break;
7682
7683 case kIemNativeGstRegRef_FpuReg:
7684 Assert(idxRegInClass < 8);
7685 AssertFailed(); /** @todo what kind of indexing? */
7686 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7687 break;
7688
7689 case kIemNativeGstRegRef_MReg:
7690 Assert(idxRegInClass < 8);
7691 AssertFailed(); /** @todo what kind of indexing? */
7692 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7693 break;
7694
7695 case kIemNativeGstRegRef_XReg:
7696 Assert(idxRegInClass < 16);
7697 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7698 break;
7699
7700 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7701 Assert(idxRegInClass == 0);
7702 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7703 break;
7704
7705 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7706 Assert(idxRegInClass == 0);
7707 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7708 break;
7709
7710 default:
7711 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7712 }
7713
7714 /*
7715 * Load the value into the destination register.
7716 */
7717#ifdef RT_ARCH_AMD64
7718 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7719
7720#elif defined(RT_ARCH_ARM64)
7721 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7722 Assert(offCpumCtx < 4096);
7723 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7724
7725#else
7726# error "Port me!"
7727#endif
7728
7729 return off;
7730}
7731
7732
7733/**
7734 * Common code for CIMPL and AIMPL calls.
7735 *
7736 * These are calls that uses argument variables and such. They should not be
7737 * confused with internal calls required to implement an MC operation,
7738 * like a TLB load and similar.
7739 *
7740 * Upon return all that is left to do is to load any hidden arguments and
7741 * perform the call. All argument variables are freed.
7742 *
7743 * @returns New code buffer offset; throws VBox status code on error.
7744 * @param pReNative The native recompile state.
7745 * @param off The code buffer offset.
7746 * @param cArgs The total nubmer of arguments (includes hidden
7747 * count).
7748 * @param cHiddenArgs The number of hidden arguments. The hidden
7749 * arguments must not have any variable declared for
7750 * them, whereas all the regular arguments must
7751 * (tstIEMCheckMc ensures this).
7752 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7753 * this will still flush pending writes in call volatile registers if false.
7754 */
7755DECL_HIDDEN_THROW(uint32_t)
7756iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7757 bool fFlushPendingWrites /*= true*/)
7758{
7759#ifdef VBOX_STRICT
7760 /*
7761 * Assert sanity.
7762 */
7763 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7764 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7765 for (unsigned i = 0; i < cHiddenArgs; i++)
7766 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7767 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7768 {
7769 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7770 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7771 }
7772 iemNativeRegAssertSanity(pReNative);
7773#endif
7774
7775 /* We don't know what the called function makes use of, so flush any pending register writes. */
7776 RT_NOREF(fFlushPendingWrites);
7777#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7778 if (fFlushPendingWrites)
7779#endif
7780 off = iemNativeRegFlushPendingWrites(pReNative, off);
7781
7782 /*
7783 * Before we do anything else, go over variables that are referenced and
7784 * make sure they are not in a register.
7785 */
7786 uint32_t bmVars = pReNative->Core.bmVars;
7787 if (bmVars)
7788 {
7789 do
7790 {
7791 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7792 bmVars &= ~RT_BIT_32(idxVar);
7793
7794 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7795 {
7796 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7797#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7798 if ( idxRegOld != UINT8_MAX
7799 && pReNative->Core.aVars[idxVar].fSimdReg)
7800 {
7801 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7802 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7803
7804 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7805 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7806 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7807 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7808 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7809 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7810 else
7811 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7812
7813 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7814 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7815
7816 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7817 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7818 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7819 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7820 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7821 }
7822 else
7823#endif
7824 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7825 {
7826 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7827 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7828 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7829 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7830 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7831
7832 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7833 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7834 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7835 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7836 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7837 }
7838 }
7839 } while (bmVars != 0);
7840#if 0 //def VBOX_STRICT
7841 iemNativeRegAssertSanity(pReNative);
7842#endif
7843 }
7844
7845 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7846
7847#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7848 /*
7849 * At the very first step go over the host registers that will be used for arguments
7850 * don't shadow anything which needs writing back first.
7851 */
7852 for (uint32_t i = 0; i < cRegArgs; i++)
7853 {
7854 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7855
7856 /* Writeback any dirty guest shadows before using this register. */
7857 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7858 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7859 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7860 }
7861#endif
7862
7863 /*
7864 * First, go over the host registers that will be used for arguments and make
7865 * sure they either hold the desired argument or are free.
7866 */
7867 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7868 {
7869 for (uint32_t i = 0; i < cRegArgs; i++)
7870 {
7871 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7872 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7873 {
7874 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7875 {
7876 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7877 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7878 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7879 Assert(pVar->idxReg == idxArgReg);
7880 uint8_t const uArgNo = pVar->uArgNo;
7881 if (uArgNo == i)
7882 { /* prefect */ }
7883 /* The variable allocator logic should make sure this is impossible,
7884 except for when the return register is used as a parameter (ARM,
7885 but not x86). */
7886#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7887 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7888 {
7889# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7890# error "Implement this"
7891# endif
7892 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7893 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7894 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7895 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7896 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7897 }
7898#endif
7899 else
7900 {
7901 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7902
7903 if (pVar->enmKind == kIemNativeVarKind_Stack)
7904 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7905 else
7906 {
7907 /* just free it, can be reloaded if used again */
7908 pVar->idxReg = UINT8_MAX;
7909 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7910 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7911 }
7912 }
7913 }
7914 else
7915 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7916 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7917 }
7918 }
7919#if 0 //def VBOX_STRICT
7920 iemNativeRegAssertSanity(pReNative);
7921#endif
7922 }
7923
7924 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7925
7926#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7927 /*
7928 * If there are any stack arguments, make sure they are in their place as well.
7929 *
7930 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7931 * the caller) be loading it later and it must be free (see first loop).
7932 */
7933 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7934 {
7935 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7936 {
7937 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7938 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7939 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7940 {
7941 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7942 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7943 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7944 pVar->idxReg = UINT8_MAX;
7945 }
7946 else
7947 {
7948 /* Use ARG0 as temp for stuff we need registers for. */
7949 switch (pVar->enmKind)
7950 {
7951 case kIemNativeVarKind_Stack:
7952 {
7953 uint8_t const idxStackSlot = pVar->idxStackSlot;
7954 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7955 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7956 iemNativeStackCalcBpDisp(idxStackSlot));
7957 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7958 continue;
7959 }
7960
7961 case kIemNativeVarKind_Immediate:
7962 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7963 continue;
7964
7965 case kIemNativeVarKind_VarRef:
7966 {
7967 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7968 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7969 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7970 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7971 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7972# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7973 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
7974 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
7975 if ( fSimdReg
7976 && idxRegOther != UINT8_MAX)
7977 {
7978 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7979 if (cbVar == sizeof(RTUINT128U))
7980 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
7981 else
7982 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
7983 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7984 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7985 }
7986 else
7987# endif
7988 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7989 {
7990 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7991 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7992 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7993 }
7994 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7995 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7996 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7997 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7998 continue;
7999 }
8000
8001 case kIemNativeVarKind_GstRegRef:
8002 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8003 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8004 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8005 continue;
8006
8007 case kIemNativeVarKind_Invalid:
8008 case kIemNativeVarKind_End:
8009 break;
8010 }
8011 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8012 }
8013 }
8014# if 0 //def VBOX_STRICT
8015 iemNativeRegAssertSanity(pReNative);
8016# endif
8017 }
8018#else
8019 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8020#endif
8021
8022 /*
8023 * Make sure the argument variables are loaded into their respective registers.
8024 *
8025 * We can optimize this by ASSUMING that any register allocations are for
8026 * registeres that have already been loaded and are ready. The previous step
8027 * saw to that.
8028 */
8029 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8030 {
8031 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8032 {
8033 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8034 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8035 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8036 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8037 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8038 else
8039 {
8040 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8041 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8042 {
8043 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8044 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8045 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8046 | RT_BIT_32(idxArgReg);
8047 pVar->idxReg = idxArgReg;
8048 }
8049 else
8050 {
8051 /* Use ARG0 as temp for stuff we need registers for. */
8052 switch (pVar->enmKind)
8053 {
8054 case kIemNativeVarKind_Stack:
8055 {
8056 uint8_t const idxStackSlot = pVar->idxStackSlot;
8057 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8058 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8059 continue;
8060 }
8061
8062 case kIemNativeVarKind_Immediate:
8063 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8064 continue;
8065
8066 case kIemNativeVarKind_VarRef:
8067 {
8068 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8069 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8070 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8071 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8072 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8073 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8074#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8075 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8076 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8077 if ( fSimdReg
8078 && idxRegOther != UINT8_MAX)
8079 {
8080 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8081 if (cbVar == sizeof(RTUINT128U))
8082 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8083 else
8084 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8085 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8086 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8087 }
8088 else
8089#endif
8090 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8091 {
8092 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8093 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8094 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8095 }
8096 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8097 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8098 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8099 continue;
8100 }
8101
8102 case kIemNativeVarKind_GstRegRef:
8103 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8104 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8105 continue;
8106
8107 case kIemNativeVarKind_Invalid:
8108 case kIemNativeVarKind_End:
8109 break;
8110 }
8111 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8112 }
8113 }
8114 }
8115#if 0 //def VBOX_STRICT
8116 iemNativeRegAssertSanity(pReNative);
8117#endif
8118 }
8119#ifdef VBOX_STRICT
8120 else
8121 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8122 {
8123 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8124 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8125 }
8126#endif
8127
8128 /*
8129 * Free all argument variables (simplified).
8130 * Their lifetime always expires with the call they are for.
8131 */
8132 /** @todo Make the python script check that arguments aren't used after
8133 * IEM_MC_CALL_XXXX. */
8134 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8135 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8136 * an argument value. There is also some FPU stuff. */
8137 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8138 {
8139 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8140 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8141
8142 /* no need to free registers: */
8143 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8144 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8145 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8146 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8147 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8148 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8149
8150 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8151 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8152 iemNativeVarFreeStackSlots(pReNative, idxVar);
8153 }
8154 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8155
8156 /*
8157 * Flush volatile registers as we make the call.
8158 */
8159 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8160
8161 return off;
8162}
8163
8164
8165
8166/*********************************************************************************************************************************
8167* TLB Lookup. *
8168*********************************************************************************************************************************/
8169
8170/**
8171 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8172 */
8173DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8174{
8175 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8176 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8177 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8178 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8179
8180 /* Do the lookup manually. */
8181 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8182 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8183 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8184 if (RT_LIKELY(pTlbe->uTag == uTag))
8185 {
8186 /*
8187 * Check TLB page table level access flags.
8188 */
8189 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8190 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8191 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8192 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8193 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8194 | IEMTLBE_F_PG_UNASSIGNED
8195 | IEMTLBE_F_PT_NO_ACCESSED
8196 | fNoWriteNoDirty | fNoUser);
8197 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8198 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8199 {
8200 /*
8201 * Return the address.
8202 */
8203 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8204 if ((uintptr_t)pbAddr == uResult)
8205 return;
8206 RT_NOREF(cbMem);
8207 AssertFailed();
8208 }
8209 else
8210 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8211 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8212 }
8213 else
8214 AssertFailed();
8215 RT_BREAKPOINT();
8216}
8217
8218/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8219
8220
8221
8222/*********************************************************************************************************************************
8223* Recompiler Core. *
8224*********************************************************************************************************************************/
8225
8226/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8227static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8228{
8229 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8230 pDis->cbCachedInstr += cbMaxRead;
8231 RT_NOREF(cbMinRead);
8232 return VERR_NO_DATA;
8233}
8234
8235
8236DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8237{
8238 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8239 {
8240#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8241 ENTRY(fLocalForcedActions),
8242 ENTRY(iem.s.rcPassUp),
8243 ENTRY(iem.s.fExec),
8244 ENTRY(iem.s.pbInstrBuf),
8245 ENTRY(iem.s.uInstrBufPc),
8246 ENTRY(iem.s.GCPhysInstrBuf),
8247 ENTRY(iem.s.cbInstrBufTotal),
8248 ENTRY(iem.s.idxTbCurInstr),
8249#ifdef VBOX_WITH_STATISTICS
8250 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8251 ENTRY(iem.s.StatNativeTlbHitsForStore),
8252 ENTRY(iem.s.StatNativeTlbHitsForStack),
8253 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8254 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8255 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8256 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8257 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8258#endif
8259 ENTRY(iem.s.DataTlb.uTlbRevision),
8260 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8261 ENTRY(iem.s.DataTlb.cTlbHits),
8262 ENTRY(iem.s.DataTlb.aEntries),
8263 ENTRY(iem.s.CodeTlb.uTlbRevision),
8264 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8265 ENTRY(iem.s.CodeTlb.cTlbHits),
8266 ENTRY(iem.s.CodeTlb.aEntries),
8267 ENTRY(pVMR3),
8268 ENTRY(cpum.GstCtx.rax),
8269 ENTRY(cpum.GstCtx.ah),
8270 ENTRY(cpum.GstCtx.rcx),
8271 ENTRY(cpum.GstCtx.ch),
8272 ENTRY(cpum.GstCtx.rdx),
8273 ENTRY(cpum.GstCtx.dh),
8274 ENTRY(cpum.GstCtx.rbx),
8275 ENTRY(cpum.GstCtx.bh),
8276 ENTRY(cpum.GstCtx.rsp),
8277 ENTRY(cpum.GstCtx.rbp),
8278 ENTRY(cpum.GstCtx.rsi),
8279 ENTRY(cpum.GstCtx.rdi),
8280 ENTRY(cpum.GstCtx.r8),
8281 ENTRY(cpum.GstCtx.r9),
8282 ENTRY(cpum.GstCtx.r10),
8283 ENTRY(cpum.GstCtx.r11),
8284 ENTRY(cpum.GstCtx.r12),
8285 ENTRY(cpum.GstCtx.r13),
8286 ENTRY(cpum.GstCtx.r14),
8287 ENTRY(cpum.GstCtx.r15),
8288 ENTRY(cpum.GstCtx.es.Sel),
8289 ENTRY(cpum.GstCtx.es.u64Base),
8290 ENTRY(cpum.GstCtx.es.u32Limit),
8291 ENTRY(cpum.GstCtx.es.Attr),
8292 ENTRY(cpum.GstCtx.cs.Sel),
8293 ENTRY(cpum.GstCtx.cs.u64Base),
8294 ENTRY(cpum.GstCtx.cs.u32Limit),
8295 ENTRY(cpum.GstCtx.cs.Attr),
8296 ENTRY(cpum.GstCtx.ss.Sel),
8297 ENTRY(cpum.GstCtx.ss.u64Base),
8298 ENTRY(cpum.GstCtx.ss.u32Limit),
8299 ENTRY(cpum.GstCtx.ss.Attr),
8300 ENTRY(cpum.GstCtx.ds.Sel),
8301 ENTRY(cpum.GstCtx.ds.u64Base),
8302 ENTRY(cpum.GstCtx.ds.u32Limit),
8303 ENTRY(cpum.GstCtx.ds.Attr),
8304 ENTRY(cpum.GstCtx.fs.Sel),
8305 ENTRY(cpum.GstCtx.fs.u64Base),
8306 ENTRY(cpum.GstCtx.fs.u32Limit),
8307 ENTRY(cpum.GstCtx.fs.Attr),
8308 ENTRY(cpum.GstCtx.gs.Sel),
8309 ENTRY(cpum.GstCtx.gs.u64Base),
8310 ENTRY(cpum.GstCtx.gs.u32Limit),
8311 ENTRY(cpum.GstCtx.gs.Attr),
8312 ENTRY(cpum.GstCtx.rip),
8313 ENTRY(cpum.GstCtx.eflags),
8314 ENTRY(cpum.GstCtx.uRipInhibitInt),
8315 ENTRY(cpum.GstCtx.cr0),
8316 ENTRY(cpum.GstCtx.cr4),
8317 ENTRY(cpum.GstCtx.aXcr[0]),
8318 ENTRY(cpum.GstCtx.aXcr[1]),
8319#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8320 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8321 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8322 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8323 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8324 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8325 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8326 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8327 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8328 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8329 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8330 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8331 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8332 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8333 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8334 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8335 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8336 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8337 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8338 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8339 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8340 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8341 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8342 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8343 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8344 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8345 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8346 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8347 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8348 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8349 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8350 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8351 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8352 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8353#endif
8354#undef ENTRY
8355 };
8356#ifdef VBOX_STRICT
8357 static bool s_fOrderChecked = false;
8358 if (!s_fOrderChecked)
8359 {
8360 s_fOrderChecked = true;
8361 uint32_t offPrev = s_aMembers[0].off;
8362 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8363 {
8364 Assert(s_aMembers[i].off > offPrev);
8365 offPrev = s_aMembers[i].off;
8366 }
8367 }
8368#endif
8369
8370 /*
8371 * Binary lookup.
8372 */
8373 unsigned iStart = 0;
8374 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8375 for (;;)
8376 {
8377 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8378 uint32_t const offCur = s_aMembers[iCur].off;
8379 if (off < offCur)
8380 {
8381 if (iCur != iStart)
8382 iEnd = iCur;
8383 else
8384 break;
8385 }
8386 else if (off > offCur)
8387 {
8388 if (iCur + 1 < iEnd)
8389 iStart = iCur + 1;
8390 else
8391 break;
8392 }
8393 else
8394 return s_aMembers[iCur].pszName;
8395 }
8396#ifdef VBOX_WITH_STATISTICS
8397 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8398 return "iem.s.acThreadedFuncStats[iFn]";
8399#endif
8400 return NULL;
8401}
8402
8403
8404DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8405{
8406 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8407#if defined(RT_ARCH_AMD64)
8408 static const char * const a_apszMarkers[] =
8409 {
8410 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8411 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8412 };
8413#endif
8414
8415 char szDisBuf[512];
8416 DISSTATE Dis;
8417 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8418 uint32_t const cNative = pTb->Native.cInstructions;
8419 uint32_t offNative = 0;
8420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8421 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8422#endif
8423 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8424 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8425 : DISCPUMODE_64BIT;
8426#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8427 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8428#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8429 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8430#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8431# error "Port me"
8432#else
8433 csh hDisasm = ~(size_t)0;
8434# if defined(RT_ARCH_AMD64)
8435 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8436# elif defined(RT_ARCH_ARM64)
8437 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8438# else
8439# error "Port me"
8440# endif
8441 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8442
8443 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8444 //Assert(rcCs == CS_ERR_OK);
8445#endif
8446
8447 /*
8448 * Print TB info.
8449 */
8450 pHlp->pfnPrintf(pHlp,
8451 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8452 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8453 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8454 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8455#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8456 if (pDbgInfo && pDbgInfo->cEntries > 1)
8457 {
8458 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8459
8460 /*
8461 * This disassembly is driven by the debug info which follows the native
8462 * code and indicates when it starts with the next guest instructions,
8463 * where labels are and such things.
8464 */
8465 uint32_t idxThreadedCall = 0;
8466 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8467 uint8_t idxRange = UINT8_MAX;
8468 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8469 uint32_t offRange = 0;
8470 uint32_t offOpcodes = 0;
8471 uint32_t const cbOpcodes = pTb->cbOpcodes;
8472 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8473 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8474 uint32_t iDbgEntry = 1;
8475 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8476
8477 while (offNative < cNative)
8478 {
8479 /* If we're at or have passed the point where the next chunk of debug
8480 info starts, process it. */
8481 if (offDbgNativeNext <= offNative)
8482 {
8483 offDbgNativeNext = UINT32_MAX;
8484 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8485 {
8486 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8487 {
8488 case kIemTbDbgEntryType_GuestInstruction:
8489 {
8490 /* Did the exec flag change? */
8491 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8492 {
8493 pHlp->pfnPrintf(pHlp,
8494 " fExec change %#08x -> %#08x %s\n",
8495 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8496 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8497 szDisBuf, sizeof(szDisBuf)));
8498 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8499 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8500 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8501 : DISCPUMODE_64BIT;
8502 }
8503
8504 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8505 where the compilation was aborted before the opcode was recorded and the actual
8506 instruction was translated to a threaded call. This may happen when we run out
8507 of ranges, or when some complicated interrupts/FFs are found to be pending or
8508 similar. So, we just deal with it here rather than in the compiler code as it
8509 is a lot simpler to do here. */
8510 if ( idxRange == UINT8_MAX
8511 || idxRange >= cRanges
8512 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8513 {
8514 idxRange += 1;
8515 if (idxRange < cRanges)
8516 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8517 else
8518 continue;
8519 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8520 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8521 + (pTb->aRanges[idxRange].idxPhysPage == 0
8522 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8523 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8524 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8525 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8526 pTb->aRanges[idxRange].idxPhysPage);
8527 GCPhysPc += offRange;
8528 }
8529
8530 /* Disassemble the instruction. */
8531 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8532 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8533 uint32_t cbInstr = 1;
8534 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8535 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8536 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8537 if (RT_SUCCESS(rc))
8538 {
8539 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8540 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8541 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8542 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8543
8544 static unsigned const s_offMarker = 55;
8545 static char const s_szMarker[] = " ; <--- guest";
8546 if (cch < s_offMarker)
8547 {
8548 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8549 cch = s_offMarker;
8550 }
8551 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8552 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8553
8554 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8555 }
8556 else
8557 {
8558 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8559 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8560 cbInstr = 1;
8561 }
8562 GCPhysPc += cbInstr;
8563 offOpcodes += cbInstr;
8564 offRange += cbInstr;
8565 continue;
8566 }
8567
8568 case kIemTbDbgEntryType_ThreadedCall:
8569 pHlp->pfnPrintf(pHlp,
8570 " Call #%u to %s (%u args) - %s\n",
8571 idxThreadedCall,
8572 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8573 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8574 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8575 idxThreadedCall++;
8576 continue;
8577
8578 case kIemTbDbgEntryType_GuestRegShadowing:
8579 {
8580 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8581 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8582 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8583 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8584 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8585 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8586 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8587 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8588 else
8589 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8590 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8591 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8592 continue;
8593 }
8594
8595#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8596 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8597 {
8598 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8599 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8600 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8601 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8602 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8603 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8604 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8605 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8606 else
8607 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8608 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8609 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8610 continue;
8611 }
8612#endif
8613
8614 case kIemTbDbgEntryType_Label:
8615 {
8616 const char *pszName = "what_the_fudge";
8617 const char *pszComment = "";
8618 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8619 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8620 {
8621 case kIemNativeLabelType_Return: pszName = "Return"; break;
8622 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8623 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8624 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8625 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8626 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8627 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8628 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8629 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8630 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8631 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8632 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8633 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8634 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8635 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8636 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8637 case kIemNativeLabelType_If:
8638 pszName = "If";
8639 fNumbered = true;
8640 break;
8641 case kIemNativeLabelType_Else:
8642 pszName = "Else";
8643 fNumbered = true;
8644 pszComment = " ; regs state restored pre-if-block";
8645 break;
8646 case kIemNativeLabelType_Endif:
8647 pszName = "Endif";
8648 fNumbered = true;
8649 break;
8650 case kIemNativeLabelType_CheckIrq:
8651 pszName = "CheckIrq_CheckVM";
8652 fNumbered = true;
8653 break;
8654 case kIemNativeLabelType_TlbLookup:
8655 pszName = "TlbLookup";
8656 fNumbered = true;
8657 break;
8658 case kIemNativeLabelType_TlbMiss:
8659 pszName = "TlbMiss";
8660 fNumbered = true;
8661 break;
8662 case kIemNativeLabelType_TlbDone:
8663 pszName = "TlbDone";
8664 fNumbered = true;
8665 break;
8666 case kIemNativeLabelType_Invalid:
8667 case kIemNativeLabelType_End:
8668 break;
8669 }
8670 if (fNumbered)
8671 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8672 else
8673 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8674 continue;
8675 }
8676
8677 case kIemTbDbgEntryType_NativeOffset:
8678 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8679 Assert(offDbgNativeNext >= offNative);
8680 break;
8681
8682#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8683 case kIemTbDbgEntryType_DelayedPcUpdate:
8684 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8685 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8686 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8687 continue;
8688#endif
8689
8690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8691 case kIemTbDbgEntryType_GuestRegDirty:
8692 {
8693 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8694 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8695 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8696 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8697 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8698 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8699 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8700 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8701 pszGstReg, pszHstReg);
8702 continue;
8703 }
8704
8705 case kIemTbDbgEntryType_GuestRegWriteback:
8706 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8707 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8708 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
8709 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
8710 continue;
8711#endif
8712
8713 default:
8714 AssertFailed();
8715 }
8716 iDbgEntry++;
8717 break;
8718 }
8719 }
8720
8721 /*
8722 * Disassemble the next native instruction.
8723 */
8724 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8725# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8726 uint32_t cbInstr = sizeof(paNative[0]);
8727 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8728 if (RT_SUCCESS(rc))
8729 {
8730# if defined(RT_ARCH_AMD64)
8731 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8732 {
8733 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8734 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8735 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8736 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8737 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8738 uInfo & 0x8000 ? "recompiled" : "todo");
8739 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8740 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8741 else
8742 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8743 }
8744 else
8745# endif
8746 {
8747 const char *pszAnnotation = NULL;
8748# ifdef RT_ARCH_AMD64
8749 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8750 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8751 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8752 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8753 PCDISOPPARAM pMemOp;
8754 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8755 pMemOp = &Dis.Param1;
8756 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8757 pMemOp = &Dis.Param2;
8758 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8759 pMemOp = &Dis.Param3;
8760 else
8761 pMemOp = NULL;
8762 if ( pMemOp
8763 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8764 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8765 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8766 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8767
8768#elif defined(RT_ARCH_ARM64)
8769 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8770 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8771 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8772# else
8773# error "Port me"
8774# endif
8775 if (pszAnnotation)
8776 {
8777 static unsigned const s_offAnnotation = 55;
8778 size_t const cchAnnotation = strlen(pszAnnotation);
8779 size_t cchDis = strlen(szDisBuf);
8780 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8781 {
8782 if (cchDis < s_offAnnotation)
8783 {
8784 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8785 cchDis = s_offAnnotation;
8786 }
8787 szDisBuf[cchDis++] = ' ';
8788 szDisBuf[cchDis++] = ';';
8789 szDisBuf[cchDis++] = ' ';
8790 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8791 }
8792 }
8793 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8794 }
8795 }
8796 else
8797 {
8798# if defined(RT_ARCH_AMD64)
8799 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8800 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8801# elif defined(RT_ARCH_ARM64)
8802 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8803# else
8804# error "Port me"
8805# endif
8806 cbInstr = sizeof(paNative[0]);
8807 }
8808 offNative += cbInstr / sizeof(paNative[0]);
8809
8810# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8811 cs_insn *pInstr;
8812 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8813 (uintptr_t)pNativeCur, 1, &pInstr);
8814 if (cInstrs > 0)
8815 {
8816 Assert(cInstrs == 1);
8817 const char *pszAnnotation = NULL;
8818# if defined(RT_ARCH_ARM64)
8819 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8820 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8821 {
8822 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8823 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8824 char *psz = strchr(pInstr->op_str, '[');
8825 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8826 {
8827 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8828 int32_t off = -1;
8829 psz += 4;
8830 if (*psz == ']')
8831 off = 0;
8832 else if (*psz == ',')
8833 {
8834 psz = RTStrStripL(psz + 1);
8835 if (*psz == '#')
8836 off = RTStrToInt32(&psz[1]);
8837 /** @todo deal with index registers and LSL as well... */
8838 }
8839 if (off >= 0)
8840 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8841 }
8842 }
8843# endif
8844
8845 size_t const cchOp = strlen(pInstr->op_str);
8846# if defined(RT_ARCH_AMD64)
8847 if (pszAnnotation)
8848 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8849 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8850 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8851 else
8852 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8853 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8854
8855# else
8856 if (pszAnnotation)
8857 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8858 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8859 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8860 else
8861 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8862 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8863# endif
8864 offNative += pInstr->size / sizeof(*pNativeCur);
8865 cs_free(pInstr, cInstrs);
8866 }
8867 else
8868 {
8869# if defined(RT_ARCH_AMD64)
8870 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8871 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8872# else
8873 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8874# endif
8875 offNative++;
8876 }
8877# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8878 }
8879 }
8880 else
8881#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8882 {
8883 /*
8884 * No debug info, just disassemble the x86 code and then the native code.
8885 *
8886 * First the guest code:
8887 */
8888 for (unsigned i = 0; i < pTb->cRanges; i++)
8889 {
8890 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8891 + (pTb->aRanges[i].idxPhysPage == 0
8892 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8893 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8894 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8895 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8896 unsigned off = pTb->aRanges[i].offOpcodes;
8897 /** @todo this ain't working when crossing pages! */
8898 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8899 while (off < cbOpcodes)
8900 {
8901 uint32_t cbInstr = 1;
8902 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8903 &pTb->pabOpcodes[off], cbOpcodes - off,
8904 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8905 if (RT_SUCCESS(rc))
8906 {
8907 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8908 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8909 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8910 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8911 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8912 GCPhysPc += cbInstr;
8913 off += cbInstr;
8914 }
8915 else
8916 {
8917 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8918 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8919 break;
8920 }
8921 }
8922 }
8923
8924 /*
8925 * Then the native code:
8926 */
8927 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8928 while (offNative < cNative)
8929 {
8930 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8931# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8932 uint32_t cbInstr = sizeof(paNative[0]);
8933 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8934 if (RT_SUCCESS(rc))
8935 {
8936# if defined(RT_ARCH_AMD64)
8937 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8938 {
8939 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8940 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8941 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8942 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8943 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8944 uInfo & 0x8000 ? "recompiled" : "todo");
8945 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8946 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8947 else
8948 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8949 }
8950 else
8951# endif
8952 {
8953# ifdef RT_ARCH_AMD64
8954 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8955 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8956 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8957 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8958# elif defined(RT_ARCH_ARM64)
8959 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8960 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8961 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8962# else
8963# error "Port me"
8964# endif
8965 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8966 }
8967 }
8968 else
8969 {
8970# if defined(RT_ARCH_AMD64)
8971 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8972 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8973# else
8974 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8975# endif
8976 cbInstr = sizeof(paNative[0]);
8977 }
8978 offNative += cbInstr / sizeof(paNative[0]);
8979
8980# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8981 cs_insn *pInstr;
8982 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8983 (uintptr_t)pNativeCur, 1, &pInstr);
8984 if (cInstrs > 0)
8985 {
8986 Assert(cInstrs == 1);
8987# if defined(RT_ARCH_AMD64)
8988 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8989 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8990# else
8991 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8992 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8993# endif
8994 offNative += pInstr->size / sizeof(*pNativeCur);
8995 cs_free(pInstr, cInstrs);
8996 }
8997 else
8998 {
8999# if defined(RT_ARCH_AMD64)
9000 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9001 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9002# else
9003 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9004# endif
9005 offNative++;
9006 }
9007# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9008 }
9009 }
9010
9011#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9012 /* Cleanup. */
9013 cs_close(&hDisasm);
9014#endif
9015}
9016
9017
9018/**
9019 * Recompiles the given threaded TB into a native one.
9020 *
9021 * In case of failure the translation block will be returned as-is.
9022 *
9023 * @returns pTb.
9024 * @param pVCpu The cross context virtual CPU structure of the calling
9025 * thread.
9026 * @param pTb The threaded translation to recompile to native.
9027 */
9028DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9029{
9030#if 0 /* For profiling the native recompiler code. */
9031l_profile_again:
9032#endif
9033 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9034
9035 /*
9036 * The first time thru, we allocate the recompiler state, the other times
9037 * we just need to reset it before using it again.
9038 */
9039 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9040 if (RT_LIKELY(pReNative))
9041 iemNativeReInit(pReNative, pTb);
9042 else
9043 {
9044 pReNative = iemNativeInit(pVCpu, pTb);
9045 AssertReturn(pReNative, pTb);
9046 }
9047
9048#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9049 /*
9050 * First do liveness analysis. This is done backwards.
9051 */
9052 {
9053 uint32_t idxCall = pTb->Thrd.cCalls;
9054 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9055 { /* likely */ }
9056 else
9057 {
9058 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9059 while (idxCall > cAlloc)
9060 cAlloc *= 2;
9061 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9062 AssertReturn(pvNew, pTb);
9063 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9064 pReNative->cLivenessEntriesAlloc = cAlloc;
9065 }
9066 AssertReturn(idxCall > 0, pTb);
9067 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9068
9069 /* The initial (final) entry. */
9070 idxCall--;
9071 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9072
9073 /* Loop backwards thru the calls and fill in the other entries. */
9074 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9075 while (idxCall > 0)
9076 {
9077 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9078 if (pfnLiveness)
9079 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9080 else
9081 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9082 pCallEntry--;
9083 idxCall--;
9084 }
9085
9086# ifdef VBOX_WITH_STATISTICS
9087 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9088 to 'clobbered' rather that 'input'. */
9089 /** @todo */
9090# endif
9091 }
9092#endif
9093
9094 /*
9095 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9096 * for aborting if an error happens.
9097 */
9098 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9099#ifdef LOG_ENABLED
9100 uint32_t const cCallsOrg = cCallsLeft;
9101#endif
9102 uint32_t off = 0;
9103 int rc = VINF_SUCCESS;
9104 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9105 {
9106 /*
9107 * Emit prolog code (fixed).
9108 */
9109 off = iemNativeEmitProlog(pReNative, off);
9110
9111 /*
9112 * Convert the calls to native code.
9113 */
9114#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9115 int32_t iGstInstr = -1;
9116#endif
9117#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9118 uint32_t cThreadedCalls = 0;
9119 uint32_t cRecompiledCalls = 0;
9120#endif
9121#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9122 uint32_t idxCurCall = 0;
9123#endif
9124 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9125 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9126 while (cCallsLeft-- > 0)
9127 {
9128 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9129#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9130 pReNative->idxCurCall = idxCurCall;
9131#endif
9132
9133 /*
9134 * Debug info, assembly markup and statistics.
9135 */
9136#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9137 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9138 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9139#endif
9140#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9141 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9142 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9143 {
9144 if (iGstInstr < (int32_t)pTb->cInstructions)
9145 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9146 else
9147 Assert(iGstInstr == pTb->cInstructions);
9148 iGstInstr = pCallEntry->idxInstr;
9149 }
9150 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9151#endif
9152#if defined(VBOX_STRICT)
9153 off = iemNativeEmitMarker(pReNative, off,
9154 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9155#endif
9156#if defined(VBOX_STRICT)
9157 iemNativeRegAssertSanity(pReNative);
9158#endif
9159#ifdef VBOX_WITH_STATISTICS
9160 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9161#endif
9162
9163 /*
9164 * Actual work.
9165 */
9166 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9167 pfnRecom ? "(recompiled)" : "(todo)"));
9168 if (pfnRecom) /** @todo stats on this. */
9169 {
9170 off = pfnRecom(pReNative, off, pCallEntry);
9171 STAM_REL_STATS({cRecompiledCalls++;});
9172 }
9173 else
9174 {
9175 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9176 STAM_REL_STATS({cThreadedCalls++;});
9177 }
9178 Assert(off <= pReNative->cInstrBufAlloc);
9179 Assert(pReNative->cCondDepth == 0);
9180
9181#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9182 if (LogIs2Enabled())
9183 {
9184 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9185# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9186 static const char s_achState[] = "CUXI";
9187# else
9188 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9189# endif
9190
9191 char szGpr[17];
9192 for (unsigned i = 0; i < 16; i++)
9193 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9194 szGpr[16] = '\0';
9195
9196 char szSegBase[X86_SREG_COUNT + 1];
9197 char szSegLimit[X86_SREG_COUNT + 1];
9198 char szSegAttrib[X86_SREG_COUNT + 1];
9199 char szSegSel[X86_SREG_COUNT + 1];
9200 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9201 {
9202 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9203 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9204 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9205 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9206 }
9207 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9208 = szSegSel[X86_SREG_COUNT] = '\0';
9209
9210 char szEFlags[8];
9211 for (unsigned i = 0; i < 7; i++)
9212 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9213 szEFlags[7] = '\0';
9214
9215 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9216 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9217 }
9218#endif
9219
9220 /*
9221 * Advance.
9222 */
9223 pCallEntry++;
9224#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9225 idxCurCall++;
9226#endif
9227 }
9228
9229 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9230 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9231 if (!cThreadedCalls)
9232 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9233
9234#ifdef VBOX_WITH_STATISTICS
9235 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
9236 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
9237 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2,
9238 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
9239 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
9240 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
9241#endif
9242
9243 /*
9244 * Emit the epilog code.
9245 */
9246 uint32_t idxReturnLabel;
9247 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9248
9249 /*
9250 * Generate special jump labels.
9251 */
9252 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9253 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9254 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9255 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9256
9257 /*
9258 * Generate simple TB tail labels that just calls a help with a pVCpu
9259 * arg and either return or longjmps/throws a non-zero status.
9260 *
9261 * The array entries must be ordered by enmLabel value so we can index
9262 * using fTailLabels bit numbers.
9263 */
9264 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9265 static struct
9266 {
9267 IEMNATIVELABELTYPE enmLabel;
9268 uint32_t offVCpuStats;
9269 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9270 } const g_aSimpleTailLabels[] =
9271 {
9272 { kIemNativeLabelType_Invalid, 0, NULL },
9273 { kIemNativeLabelType_RaiseDe, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseDe), iemNativeHlpExecRaiseDe },
9274 { kIemNativeLabelType_RaiseUd, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseUd), iemNativeHlpExecRaiseUd },
9275 { kIemNativeLabelType_RaiseSseRelated, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseSseRelated), iemNativeHlpExecRaiseSseRelated },
9276 { kIemNativeLabelType_RaiseAvxRelated, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseAvxRelated), iemNativeHlpExecRaiseAvxRelated },
9277 { kIemNativeLabelType_RaiseSseAvxFpRelated, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseSseAvxFpRelated), iemNativeHlpExecRaiseSseAvxFpRelated },
9278 { kIemNativeLabelType_RaiseNm, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseNm), iemNativeHlpExecRaiseNm },
9279 { kIemNativeLabelType_RaiseGp0, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseGp0), iemNativeHlpExecRaiseGp0 },
9280 { kIemNativeLabelType_RaiseMf, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseMf), iemNativeHlpExecRaiseMf },
9281 { kIemNativeLabelType_RaiseXf, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitRaiseXf), iemNativeHlpExecRaiseXf },
9282 { kIemNativeLabelType_ObsoleteTb, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitObsoleteTb), iemNativeHlpObsoleteTb },
9283 { kIemNativeLabelType_NeedCsLimChecking, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitNeedCsLimChecking), iemNativeHlpNeedCsLimChecking },
9284 { kIemNativeLabelType_CheckBranchMiss, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbExitCheckBranchMiss), iemNativeHlpCheckBranchMiss },
9285 };
9286 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9287 AssertCompile(kIemNativeLabelType_Invalid == 0);
9288 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9289 if (fTailLabels)
9290 {
9291#ifdef VBOX_WITH_STATISTICS
9292 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
9293 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
9294#endif
9295
9296 do
9297 {
9298 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9299 fTailLabels &= ~RT_BIT_64(enmLabel);
9300 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9301
9302 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9303 Assert(idxLabel != UINT32_MAX);
9304 if (idxLabel != UINT32_MAX)
9305 {
9306 iemNativeLabelDefine(pReNative, idxLabel, off);
9307
9308#ifdef VBOX_WITH_STATISTICS
9309 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2,
9310 g_aSimpleTailLabels[enmLabel].offVCpuStats);
9311#endif
9312
9313 /* int pfnCallback(PVMCPUCC pVCpu) */
9314 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9315 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9316
9317 /* jump back to the return sequence. */
9318 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9319 }
9320
9321 } while (fTailLabels);
9322
9323#ifdef VBOX_WITH_STATISTICS
9324 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
9325 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
9326#endif
9327 }
9328 }
9329 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9330 {
9331 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9332 return pTb;
9333 }
9334 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9335 Assert(off <= pReNative->cInstrBufAlloc);
9336
9337 /*
9338 * Make sure all labels has been defined.
9339 */
9340 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9341#ifdef VBOX_STRICT
9342 uint32_t const cLabels = pReNative->cLabels;
9343 for (uint32_t i = 0; i < cLabels; i++)
9344 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9345#endif
9346
9347#if 0 /* For profiling the native recompiler code. */
9348 if (pTb->Thrd.cCalls >= 136)
9349 {
9350 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9351 goto l_profile_again;
9352 }
9353#endif
9354
9355 /*
9356 * Allocate executable memory, copy over the code we've generated.
9357 */
9358 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9359 if (pTbAllocator->pDelayedFreeHead)
9360 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9361
9362 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9363 AssertReturn(paFinalInstrBuf, pTb);
9364 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9365
9366 /*
9367 * Apply fixups.
9368 */
9369 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9370 uint32_t const cFixups = pReNative->cFixups;
9371 for (uint32_t i = 0; i < cFixups; i++)
9372 {
9373 Assert(paFixups[i].off < off);
9374 Assert(paFixups[i].idxLabel < cLabels);
9375 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9376 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9377 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9378 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9379 switch (paFixups[i].enmType)
9380 {
9381#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9382 case kIemNativeFixupType_Rel32:
9383 Assert(paFixups[i].off + 4 <= off);
9384 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9385 continue;
9386
9387#elif defined(RT_ARCH_ARM64)
9388 case kIemNativeFixupType_RelImm26At0:
9389 {
9390 Assert(paFixups[i].off < off);
9391 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9392 Assert(offDisp >= -262144 && offDisp < 262144);
9393 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9394 continue;
9395 }
9396
9397 case kIemNativeFixupType_RelImm19At5:
9398 {
9399 Assert(paFixups[i].off < off);
9400 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9401 Assert(offDisp >= -262144 && offDisp < 262144);
9402 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9403 continue;
9404 }
9405
9406 case kIemNativeFixupType_RelImm14At5:
9407 {
9408 Assert(paFixups[i].off < off);
9409 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9410 Assert(offDisp >= -8192 && offDisp < 8192);
9411 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9412 continue;
9413 }
9414
9415#endif
9416 case kIemNativeFixupType_Invalid:
9417 case kIemNativeFixupType_End:
9418 break;
9419 }
9420 AssertFailed();
9421 }
9422
9423 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9424 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9425
9426 /*
9427 * Convert the translation block.
9428 */
9429 RTMemFree(pTb->Thrd.paCalls);
9430 pTb->Native.paInstructions = paFinalInstrBuf;
9431 pTb->Native.cInstructions = off;
9432 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9433#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9434 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9435 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9436#endif
9437
9438 Assert(pTbAllocator->cThreadedTbs > 0);
9439 pTbAllocator->cThreadedTbs -= 1;
9440 pTbAllocator->cNativeTbs += 1;
9441 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9442
9443#ifdef LOG_ENABLED
9444 /*
9445 * Disassemble to the log if enabled.
9446 */
9447 if (LogIs3Enabled())
9448 {
9449 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9450 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9451# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9452 RTLogFlush(NULL);
9453# endif
9454 }
9455#endif
9456 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9457
9458 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9459 return pTb;
9460}
9461
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette