VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veRecompFuncs-x86.h@ 108296

Last change on this file since 108296 was 108296, checked in by vboxsync, 4 weeks ago

VMM/IEM: s/IEM_MC_STORE_MEM_(?!FLAT|SEG|BY)/IEM_MC_STORE_MEM_SEG_\2/g jiraref:VBP-1531

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.5 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs-x86.h 108296 2025-02-19 14:44:11Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits, x86 target.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#ifdef IN_RING0
38# define VBOX_VMM_TARGET_X86
39#endif
40#include <VBox/vmm/iem.h>
41#include <VBox/vmm/cpum.h>
42#include <VBox/vmm/dbgf.h>
43#include "IEMInternal.h"
44#include <VBox/vmm/vmcc.h>
45#include <VBox/log.h>
46#include <VBox/err.h>
47#include <VBox/dis.h>
48#include <VBox/param.h>
49#include <iprt/assert.h>
50#include <iprt/heap.h>
51#include <iprt/mem.h>
52#include <iprt/string.h>
53#if defined(RT_ARCH_AMD64)
54# include <iprt/x86.h>
55#elif defined(RT_ARCH_ARM64)
56# include <iprt/armv8.h>
57#endif
58
59#include "IEMInline.h"
60#include "IEMThreadedFunctions.h"
61#include "IEMN8veRecompiler.h"
62#include "IEMN8veRecompilerEmit.h"
63#include "IEMN8veRecompilerTlbLookup.h"
64#include "IEMNativeFunctions.h"
65#include "VMMAll/target-x86/IEMAllN8veEmit-x86.h"
66
67
68/*
69 * Narrow down configs here to avoid wasting time on unused configs here.
70 * Note! Same checks in IEMAllThrdRecompiler.cpp.
71 */
72
73#ifndef IEM_WITH_CODE_TLB
74# error The code TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_DATA_TLB
78# error The data TLB must be enabled for the recompiler.
79#endif
80
81
82/*********************************************************************************************************************************
83* Code emitters for flushing pending guest register writes and sanity checks *
84*********************************************************************************************************************************/
85
86#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
87
88# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
89/**
90 * Updates IEMCPU::uPcUpdatingDebug.
91 */
92DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
93{
94# ifdef RT_ARCH_AMD64
95 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
96 {
97 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
98 if ((int32_t)offDisp == offDisp || cBits != 64)
99 {
100 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
101 if (cBits == 64)
102 pCodeBuf[off++] = X86_OP_REX_W;
103 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
104 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
105 if ((int8_t)offDisp == offDisp)
106 pCodeBuf[off++] = (int8_t)offDisp;
107 else
108 {
109 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
110 off += sizeof(int32_t);
111 }
112 }
113 else
114 {
115 /* mov tmp0, imm64 */
116 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
117
118 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
119 if (cBits == 64)
120 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
121 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
122 pCodeBuf[off++] = X86_OP_REX_R;
123 pCodeBuf[off++] = 0x01;
124 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
125 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
126 }
127 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
128 return off;
129 }
130# endif
131
132 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
133 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
134
135 if (pReNative->Core.fDebugPcInitialized)
136 {
137 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
138 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
139 }
140 else
141 {
142 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 pReNative->Core.fDebugPcInitialized = true;
144 off = iemNativeEmitLoadGprWithGstRegExT<kIemNativeGstReg_Pc>(pCodeBuf, off, idxTmpReg);
145 }
146
147 if (cBits == 64)
148 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
149 else
150 {
151 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
152 if (cBits == 16)
153 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
154 }
155
156 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
157 IEMNATIVE_REG_FIXED_TMP0);
158
159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
160 iemNativeRegFreeTmp(pReNative, idxTmpReg);
161 return off;
162}
163
164
165# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
166DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
167{
168 /* Compare the shadow with the context value, they should match. */
169 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
170 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
171 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
172 return off;
173}
174# endif
175
176#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
177
178/**
179 * Flushes delayed write of a specific guest register.
180 *
181 * This must be called prior to calling CImpl functions and any helpers that use
182 * the guest state (like raising exceptions) and such.
183 *
184 * This optimization has not yet been implemented. The first target would be
185 * RIP updates, since these are the most common ones.
186 */
187template<IEMNATIVEGSTREGREF a_enmClass>
188DECL_INLINE_THROW(uint32_t)
189iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
190{
191#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
192 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
193#endif
194
195#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
196# if 0 /** @todo r=aeichner EFLAGS writeback delay. */
197 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_EFlags)
198 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
199 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
200# else
201 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
202# endif
203
204 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_Gpr)
205 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
206 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
207#endif
208
209 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_XReg)
210 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
211 {
212 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
213 /* Flush the shadows as the register needs to be reloaded (there is no
214 guarantee right now, that the referenced register doesn't change). */
215 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
216
217 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
218 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
219 }
220
221 return off;
222}
223
224
225
226/*********************************************************************************************************************************
227* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
228*********************************************************************************************************************************/
229
230#undef IEM_MC_BEGIN /* unused */
231#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
232 { \
233 Assert(pReNative->Core.bmVars == 0); \
234 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
235 Assert(pReNative->Core.bmStack == 0); \
236 pReNative->fMc = (a_fMcFlags); \
237 pReNative->fCImpl = (a_fCImplFlags); \
238 pReNative->cArgsX = (a_cArgsIncludingHidden)
239
240/** We have to get to the end in recompilation mode, as otherwise we won't
241 * generate code for all the IEM_MC_IF_XXX branches. */
242#define IEM_MC_END() \
243 iemNativeVarFreeAll(pReNative); \
244 } return off
245
246
247
248/*********************************************************************************************************************************
249* Liveness Stubs *
250*********************************************************************************************************************************/
251
252#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
253#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
254#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
255
256#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
257#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
258#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
259
260#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
261#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
262#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
263
264#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
265#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
266#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
267
268
269/*********************************************************************************************************************************
270* Native Emitter Support. *
271*********************************************************************************************************************************/
272
273#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
274
275#define IEM_MC_NATIVE_ELSE() } else {
276
277#define IEM_MC_NATIVE_ENDIF() } ((void)0)
278
279
280#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
281 off = a_fnEmitter(pReNative, off)
282
283#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
284 off = a_fnEmitter(pReNative, off, (a0))
285
286#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
287 off = a_fnEmitter(pReNative, off, (a0), (a1))
288
289#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
290 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
291
292#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
293 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
294
295#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
296 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
297
298#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
299 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
300
301#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
302 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
303
304#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
305 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
306
307#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
308 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
309
310
311#ifndef RT_ARCH_AMD64
312# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
313#else
314/** @note This is a naive approach that ASSUMES that the register isn't
315 * allocated, so it only works safely for the first allocation(s) in
316 * a MC block. */
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
318 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
319
320DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
321 uint32_t off, bool fAllocated);
322
323DECL_INLINE_THROW(uint32_t)
324iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
325{
326 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
327 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
328 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
329
330# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
331 /* Must flush the register if it hold pending writes. */
332 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
333 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
334 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
335# endif
336
337 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
338 return off;
339}
340
341#endif /* RT_ARCH_AMD64 */
342
343
344
345/*********************************************************************************************************************************
346* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
347*********************************************************************************************************************************/
348
349#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
350 pReNative->fMc = 0; \
351 pReNative->fCImpl = (a_fFlags); \
352 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
353 a_cbInstr) /** @todo not used ... */
354
355
356#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
357 pReNative->fMc = 0; \
358 pReNative->fCImpl = (a_fFlags); \
359 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
360
361DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
362 uint8_t idxInstr, uint64_t a_fGstShwFlush,
363 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
364{
365 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
366}
367
368
369#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
370 pReNative->fMc = 0; \
371 pReNative->fCImpl = (a_fFlags); \
372 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
373 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
374
375DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
376 uint8_t idxInstr, uint64_t a_fGstShwFlush,
377 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
378{
379 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
380}
381
382
383#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
384 pReNative->fMc = 0; \
385 pReNative->fCImpl = (a_fFlags); \
386 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
387 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
388
389DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
390 uint8_t idxInstr, uint64_t a_fGstShwFlush,
391 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
392 uint64_t uArg2)
393{
394 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
395}
396
397
398
399/*********************************************************************************************************************************
400* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_PC_AND_FINISH_XXX) *
401*********************************************************************************************************************************/
402
403/** Emits the flags check for IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC64_WITH_FLAGS
404 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
405DECL_INLINE_THROW(uint32_t)
406iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
407{
408 /*
409 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
410 * return with special status code and make the execution loop deal with
411 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
412 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
413 * could continue w/o interruption, it probably will drop into the
414 * debugger, so not worth the effort of trying to services it here and we
415 * just lump it in with the handling of the others.
416 *
417 * To simplify the code and the register state management even more (wrt
418 * immediate in AND operation), we always update the flags and skip the
419 * extra check associated conditional jump.
420 */
421 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
422 <= UINT32_MAX);
423#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
424 AssertMsg( pReNative->idxCurCall == 0
425 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
426 IEMLIVENESSBIT_IDX_EFL_OTHER)),
427 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
428 IEMLIVENESSBIT_IDX_EFL_OTHER)));
429#endif
430
431 /*
432 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
433 * any pending register writes must be flushed.
434 */
435 off = iemNativeRegFlushPendingWrites(pReNative, off);
436
437 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
438 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
439 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
440 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
441 X86_EFL_TF
442 | CPUMCTX_DBG_HIT_DRX_MASK
443 | CPUMCTX_DBG_DBGF_MASK);
444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
445 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
446
447 /* Free but don't flush the EFLAGS register. */
448 iemNativeRegFreeTmp(pReNative, idxEflReg);
449
450 return off;
451}
452
453
454/** Helper for iemNativeEmitFinishInstructionWithStatus. */
455DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
456{
457 unsigned const offOpcodes = pCallEntry->offOpcode;
458 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
459 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
460 {
461 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
462 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
463 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
464 }
465 AssertFailedReturn(NIL_RTGCPHYS);
466}
467
468
469/** The VINF_SUCCESS dummy. */
470template<int const a_rcNormal, bool const a_fIsJump>
471DECL_FORCE_INLINE_THROW(uint32_t)
472iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
473 int32_t const offJump)
474{
475 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
476 if (a_rcNormal != VINF_SUCCESS)
477 {
478#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
479 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
480#else
481 RT_NOREF_PV(pCallEntry);
482#endif
483
484 /* As this code returns from the TB any pending register writes must be flushed. */
485 off = iemNativeRegFlushPendingWrites(pReNative, off);
486
487 /*
488 * If we're in a conditional, mark the current branch as exiting so we
489 * can disregard its state when we hit the IEM_MC_ENDIF.
490 */
491 iemNativeMarkCurCondBranchAsExiting(pReNative);
492
493 /*
494 * Use the lookup table for getting to the next TB quickly.
495 * Note! In this code path there can only be one entry at present.
496 */
497 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
498 PCIEMTB const pTbOrg = pReNative->pTbOrg;
499 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
500 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
501
502#if 0
503 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
504 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
506 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
507 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
508
509 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
510
511#else
512 /* Load the index as argument #1 for the helper call at the given label. */
513 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
514
515 /*
516 * Figure out the physical address of the current instruction and see
517 * whether the next instruction we're about to execute is in the same
518 * page so we by can optimistically skip TLB loading.
519 *
520 * - This is safe for all cases in FLAT mode.
521 * - In segmentmented modes it is complicated, given that a negative
522 * jump may underflow EIP and a forward jump may overflow or run into
523 * CS.LIM and triggering a #GP. The only thing we can get away with
524 * now at compile time is forward jumps w/o CS.LIM checks, since the
525 * lack of CS.LIM checks means we're good for the entire physical page
526 * we're executing on and another 15 bytes before we run into CS.LIM.
527 */
528 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
529# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
530 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
531# endif
532 )
533 {
534 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
535 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
536 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
537 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
538
539 {
540 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
542
543 /* Load the key lookup flags into the 2nd argument for the helper call.
544 - This is safe wrt CS limit checking since we're only here for FLAT modes.
545 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
546 interrupt shadow.
547 - The NMI inhibiting is more questionable, though... */
548 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
549 * Should we copy it into fExec to simplify this? OTOH, it's just a
550 * couple of extra instructions if EFLAGS are already in a register. */
551 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
552 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
553
554 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
555 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
556 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
557 }
558 }
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
562#endif
563 }
564 return off;
565}
566
567
568#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
569 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
570 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
571
572#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
573 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
574 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577/** Same as iemRegAddToRip64AndFinishingNoFlags. */
578DECL_INLINE_THROW(uint32_t)
579iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
580{
581#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
582# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
583 if (!pReNative->Core.offPc)
584 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
585# endif
586
587 /* Allocate a temporary PC register. */
588 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
589
590 /* Perform the addition and store the result. */
591 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
592 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
593
594 /* Free but don't flush the PC register. */
595 iemNativeRegFreeTmp(pReNative, idxPcReg);
596#endif
597
598#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
599 pReNative->Core.offPc += cbInstr;
600 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
601# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
602 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
603 off = iemNativeEmitPcDebugCheck(pReNative, off);
604# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
605 off = iemNativePcAdjustCheck(pReNative, off);
606# endif
607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
608#endif
609
610 return off;
611}
612
613
614#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
615 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
617
618#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
620 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623/** Same as iemRegAddToEip32AndFinishingNoFlags. */
624DECL_INLINE_THROW(uint32_t)
625iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
626{
627#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
628# ifdef IEMNATIVE_REG_FIXED_PC_DBG
629 if (!pReNative->Core.offPc)
630 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
631# endif
632
633 /* Allocate a temporary PC register. */
634 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
635
636 /* Perform the addition and store the result. */
637 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
638 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
639
640 /* Free but don't flush the PC register. */
641 iemNativeRegFreeTmp(pReNative, idxPcReg);
642#endif
643
644#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
645 pReNative->Core.offPc += cbInstr;
646 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
647# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
648 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
649 off = iemNativeEmitPcDebugCheck(pReNative, off);
650# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
651 off = iemNativePcAdjustCheck(pReNative, off);
652# endif
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 return off;
657}
658
659
660#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
661 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
662 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
663
664#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
665 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669/** Same as iemRegAddToIp16AndFinishingNoFlags. */
670DECL_INLINE_THROW(uint32_t)
671iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
672{
673#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
674# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
675 if (!pReNative->Core.offPc)
676 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
677# endif
678
679 /* Allocate a temporary PC register. */
680 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
681
682 /* Perform the addition and store the result. */
683 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
684 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
685 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
686
687 /* Free but don't flush the PC register. */
688 iemNativeRegFreeTmp(pReNative, idxPcReg);
689#endif
690
691#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
692 pReNative->Core.offPc += cbInstr;
693 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
694# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
695 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
696 off = iemNativeEmitPcDebugCheck(pReNative, off);
697# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
698 off = iemNativePcAdjustCheck(pReNative, off);
699# endif
700 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
701#endif
702
703 return off;
704}
705
706
707/*********************************************************************************************************************************
708* Common code for changing PC/RIP/EIP/IP. *
709*********************************************************************************************************************************/
710
711/**
712 * Emits code to check if the content of @a idxAddrReg is a canonical address,
713 * raising a \#GP(0) if it isn't.
714 *
715 * @returns New code buffer offset, UINT32_MAX on failure.
716 * @param pReNative The native recompile state.
717 * @param off The code buffer offset.
718 * @param idxAddrReg The host register with the address to check.
719 * @param idxInstr The current instruction.
720 */
721DECL_FORCE_INLINE_THROW(uint32_t)
722iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
723{
724 /*
725 * Make sure we don't have any outstanding guest register writes as we may
726 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
727 */
728 off = iemNativeRegFlushPendingWrites(pReNative, off);
729
730#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
731 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
732#else
733 RT_NOREF(idxInstr);
734#endif
735
736#ifdef RT_ARCH_AMD64
737 /*
738 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
739 * return raisexcpt();
740 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
741 */
742 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
743
744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
745 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
746 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
747 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
748 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
749
750 iemNativeRegFreeTmp(pReNative, iTmpReg);
751
752#elif defined(RT_ARCH_ARM64)
753 /*
754 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
755 * return raisexcpt();
756 * ----
757 * mov x1, 0x800000000000
758 * add x1, x0, x1
759 * cmp xzr, x1, lsr 48
760 * b.ne .Lraisexcpt
761 */
762 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
763
764 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
765 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
766 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
767 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
768
769 iemNativeRegFreeTmp(pReNative, iTmpReg);
770
771#else
772# error "Port me"
773#endif
774 return off;
775}
776
777
778/**
779 * Emits code to check if the content of @a idxAddrReg is a canonical address,
780 * raising a \#GP(0) if it isn't.
781 *
782 * Caller makes sure everything is flushed, except maybe PC.
783 *
784 * @returns New code buffer offset, UINT32_MAX on failure.
785 * @param pReNative The native recompile state.
786 * @param off The code buffer offset.
787 * @param idxAddrReg The host register with the address to check.
788 * @param offDisp The relative displacement that has already been
789 * added to idxAddrReg and must be subtracted if
790 * raising a \#GP(0).
791 * @param idxInstr The current instruction.
792 */
793DECL_FORCE_INLINE_THROW(uint32_t)
794iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
795 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
796{
797#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
798 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
799#endif
800
801#ifdef RT_ARCH_AMD64
802 /*
803 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
804 * return raisexcpt();
805 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
806 */
807 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
808
809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
810 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
811 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
812 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
813
814#elif defined(RT_ARCH_ARM64)
815 /*
816 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
817 * return raisexcpt();
818 * ----
819 * mov x1, 0x800000000000
820 * add x1, x0, x1
821 * cmp xzr, x1, lsr 48
822 * b.ne .Lraisexcpt
823 */
824 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
825
826 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
827 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
828 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
829#else
830# error "Port me"
831#endif
832
833 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
834 uint32_t const offFixup1 = off;
835 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
836
837 /* jump .Lnoexcept; Skip the #GP code. */
838 uint32_t const offFixup2 = off;
839 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
840
841 /* .Lraisexcpt: */
842 iemNativeFixupFixedJump(pReNative, offFixup1, off);
843#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
844 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
845#else
846 RT_NOREF(idxInstr);
847#endif
848
849 /* Undo the PC adjustment and store the old PC value. */
850 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
851 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxAddrReg);
852
853 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
854
855 /* .Lnoexcept: */
856 iemNativeFixupFixedJump(pReNative, offFixup2, off);
857
858 iemNativeRegFreeTmp(pReNative, iTmpReg);
859 return off;
860}
861
862
863/**
864 * Emits code to check if the content of @a idxAddrReg is a canonical address,
865 * raising a \#GP(0) if it isn't.
866 *
867 * Caller makes sure everything is flushed, except maybe PC.
868 *
869 * @returns New code buffer offset, UINT32_MAX on failure.
870 * @param pReNative The native recompile state.
871 * @param off The code buffer offset.
872 * @param idxAddrReg The host register with the address to check.
873 * @param idxOldPcReg Register holding the old PC that offPc is relative
874 * to if available, otherwise UINT8_MAX.
875 * @param idxInstr The current instruction.
876 */
877DECL_FORCE_INLINE_THROW(uint32_t)
878iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
879 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
880{
881#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
882 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
883#endif
884
885#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
886# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
887 if (!pReNative->Core.offPc)
888# endif
889 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
890#else
891 RT_NOREF(idxInstr);
892#endif
893
894#ifdef RT_ARCH_AMD64
895 /*
896 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
897 * return raisexcpt();
898 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
899 */
900 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
901
902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
903 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
904 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
905 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
906
907#elif defined(RT_ARCH_ARM64)
908 /*
909 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
910 * return raisexcpt();
911 * ----
912 * mov x1, 0x800000000000
913 * add x1, x0, x1
914 * cmp xzr, x1, lsr 48
915 * b.ne .Lraisexcpt
916 */
917 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
918
919 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
920 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
921 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
922#else
923# error "Port me"
924#endif
925
926#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
927 if (pReNative->Core.offPc)
928 {
929 /** @todo On x86, it is said that conditional jumps forward are statically
930 * predicited as not taken, so this isn't a very good construct.
931 * Investigate whether it makes sense to invert it and add another
932 * jump. Also, find out wtf the static predictor does here on arm! */
933 uint32_t const offFixup = off;
934 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
935
936 /* .Lraisexcpt: */
937# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
938 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
939# endif
940 /* We need to update cpum.GstCtx.rip. */
941 if (idxOldPcReg == UINT8_MAX)
942 {
943 idxOldPcReg = iTmpReg;
944 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
945 }
946 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
947 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
948
949 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
950 iemNativeFixupFixedJump(pReNative, offFixup, off);
951 }
952 else
953#endif
954 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
955
956 iemNativeRegFreeTmp(pReNative, iTmpReg);
957
958 return off;
959}
960
961
962/**
963 * Emits code to check if that the content of @a idxAddrReg is within the limit
964 * of CS, raising a \#GP(0) if it isn't.
965 *
966 * @returns New code buffer offset; throws VBox status code on error.
967 * @param pReNative The native recompile state.
968 * @param off The code buffer offset.
969 * @param idxAddrReg The host register (32-bit) with the address to
970 * check.
971 * @param idxInstr The current instruction.
972 */
973DECL_FORCE_INLINE_THROW(uint32_t)
974iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
975 uint8_t idxAddrReg, uint8_t idxInstr)
976{
977 /*
978 * Make sure we don't have any outstanding guest register writes as we may
979 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
980 */
981 off = iemNativeRegFlushPendingWrites(pReNative, off);
982
983#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
984 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
985#else
986 RT_NOREF(idxInstr);
987#endif
988
989 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
990 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
991 kIemNativeGstRegUse_ReadOnly);
992
993 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
994 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
995
996 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
997 return off;
998}
999
1000
1001
1002
1003/**
1004 * Emits code to check if that the content of @a idxAddrReg is within the limit
1005 * of CS, raising a \#GP(0) if it isn't.
1006 *
1007 * Caller makes sure everything is flushed, except maybe PC.
1008 *
1009 * @returns New code buffer offset; throws VBox status code on error.
1010 * @param pReNative The native recompile state.
1011 * @param off The code buffer offset.
1012 * @param idxAddrReg The host register (32-bit) with the address to
1013 * check.
1014 * @param idxOldPcReg Register holding the old PC that offPc is relative
1015 * to if available, otherwise UINT8_MAX.
1016 * @param idxInstr The current instruction.
1017 */
1018DECL_FORCE_INLINE_THROW(uint32_t)
1019iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1020 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1021{
1022#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1023 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1024#endif
1025
1026#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1027# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1028 if (!pReNative->Core.offPc)
1029# endif
1030 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1031#else
1032 RT_NOREF(idxInstr);
1033#endif
1034
1035 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1036 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1037 kIemNativeGstRegUse_ReadOnly);
1038
1039 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1041 if (pReNative->Core.offPc)
1042 {
1043 uint32_t const offFixup = off;
1044 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1045
1046 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1047 if (idxOldPcReg == UINT8_MAX)
1048 {
1049 idxOldPcReg = idxAddrReg;
1050 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1051 }
1052 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1053 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1054# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1055 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1056# endif
1057 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1058 iemNativeFixupFixedJump(pReNative, offFixup, off);
1059 }
1060 else
1061#endif
1062 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1063
1064 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1065 return off;
1066}
1067
1068
1069/*********************************************************************************************************************************
1070* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1071*********************************************************************************************************************************/
1072
1073#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1074 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1075 (a_enmEffOpSize), pCallEntry->idxInstr); \
1076 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1082 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1083
1084#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1085 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1086 IEMMODE_16BIT, pCallEntry->idxInstr); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1093 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1094
1095#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1096 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1097 IEMMODE_64BIT, pCallEntry->idxInstr); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1104 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1105
1106
1107#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1108 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1109 (a_enmEffOpSize), pCallEntry->idxInstr); \
1110 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1116 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1117
1118#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1119 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1120 IEMMODE_16BIT, pCallEntry->idxInstr); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1127 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1128
1129#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1130 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1131 IEMMODE_64BIT, pCallEntry->idxInstr); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1138 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1139
1140/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1141 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1142 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1143template<bool const a_fWithinPage>
1144DECL_INLINE_THROW(uint32_t)
1145iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1146 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1147{
1148 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1149#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1150 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1151 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1152 {
1153 /* No #GP checking required, just update offPc and get on with it. */
1154 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1155# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1156 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1157# endif
1158 }
1159 else
1160#endif
1161 {
1162 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1163 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1164 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1165
1166 /* Allocate a temporary PC register. */
1167 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1168 kIemNativeGstRegUse_ForUpdate);
1169
1170 /* Perform the addition. */
1171 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1172
1173 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1174 {
1175 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1176 We can skip this if the target is within the same page. */
1177 if (!a_fWithinPage)
1178 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1179 (int64_t)offDisp + cbInstr, idxInstr);
1180 }
1181 else
1182 {
1183 /* Just truncate the result to 16-bit IP. */
1184 Assert(enmEffOpSize == IEMMODE_16BIT);
1185 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1186 }
1187
1188#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1189# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1190 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1191 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1192# endif
1193 /* Since we've already got the new PC value in idxPcReg, we can just as
1194 well write it out and reset offPc to zero. Otherwise, we'd need to use
1195 a copy the shadow PC, which will cost another move instruction here. */
1196# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1197 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1198 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1199 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1200 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1201 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1202 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1203# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1204 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1205 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1206# endif
1207# endif
1208 pReNative->Core.offPc = 0;
1209#endif
1210
1211 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1212
1213 /* Free but don't flush the PC register. */
1214 iemNativeRegFreeTmp(pReNative, idxPcReg);
1215 }
1216 return off;
1217}
1218
1219
1220#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1221 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1222 (a_enmEffOpSize), pCallEntry->idxInstr); \
1223 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1229 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1230
1231#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1232 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1233 IEMMODE_16BIT, pCallEntry->idxInstr); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1240 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1241
1242#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1243 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1244 IEMMODE_32BIT, pCallEntry->idxInstr); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1251 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1252
1253
1254#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1255 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1256 (a_enmEffOpSize), pCallEntry->idxInstr); \
1257 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1263 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1264
1265#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1266 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1267 IEMMODE_16BIT, pCallEntry->idxInstr); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1274 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1275
1276#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1277 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1278 IEMMODE_32BIT, pCallEntry->idxInstr); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1285 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1286
1287/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1288 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1289 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1290template<bool const a_fFlat>
1291DECL_INLINE_THROW(uint32_t)
1292iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1293 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1294{
1295 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1296#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1297 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1298#endif
1299
1300 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1301 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1302 {
1303 off = iemNativeRegFlushPendingWrites(pReNative, off);
1304#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1305 Assert(pReNative->Core.offPc == 0);
1306#endif
1307 }
1308
1309 /* Allocate a temporary PC register. */
1310 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1311
1312 /* Perform the addition. */
1313#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1314 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1315#else
1316 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1317#endif
1318
1319 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1320 if (enmEffOpSize == IEMMODE_16BIT)
1321 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1322
1323 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1324 if (!a_fFlat)
1325 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1326
1327 /* Commit it. */
1328#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1329 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1330 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1331#endif
1332
1333 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1334#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1335 pReNative->Core.offPc = 0;
1336#endif
1337
1338 /* Free but don't flush the PC register. */
1339 iemNativeRegFreeTmp(pReNative, idxPcReg);
1340
1341 return off;
1342}
1343
1344
1345#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1346 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1347 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1348
1349#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1350 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1351 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1357
1358#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1359 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1360 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1366
1367#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1368 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1373DECL_INLINE_THROW(uint32_t)
1374iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1375 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1376{
1377 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1378 off = iemNativeRegFlushPendingWrites(pReNative, off);
1379
1380#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1381 Assert(pReNative->Core.offPc == 0);
1382 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1383#endif
1384
1385 /* Allocate a temporary PC register. */
1386 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1387
1388 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1389 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1390 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1391 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1392#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1393 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1394 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1395#endif
1396 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1397
1398 /* Free but don't flush the PC register. */
1399 iemNativeRegFreeTmp(pReNative, idxPcReg);
1400
1401 return off;
1402}
1403
1404
1405
1406/*********************************************************************************************************************************
1407* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_IND_JMP_UXX_AND_FINISH). *
1408*********************************************************************************************************************************/
1409
1410/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for pre-386 targets. */
1411#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1412 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1413
1414/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for 386+ targets. */
1415#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1416 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1417
1418/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for use in 64-bit code. */
1419#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1420 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1421
1422/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for pre-386 targets that checks and
1423 * clears flags. */
1424#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1425 IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1426 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1427
1428/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for 386+ targets that checks and
1429 * clears flags. */
1430#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1431 IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1432 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1433
1434/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for use in 64-bit code that checks and
1435 * clears flags. */
1436#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1437 IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1438 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1439
1440#undef IEM_MC_IND_JMP_U16_AND_FINISH
1441
1442
1443/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for 386+ targets. */
1444#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1445 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1446
1447/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for use in 64-bit code. */
1448#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1450
1451/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for 386+ targets that checks and
1452 * clears flags. */
1453#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1454 IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1455 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1456
1457/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for use in 64-bit code that checks
1458 * and clears flags. */
1459#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1460 IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1461 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1462
1463#undef IEM_MC_IND_JMP_U32_AND_FINISH
1464
1465
1466/** Variant of IEM_MC_IND_JMP_U64_AND_FINISH for use in 64-bit code. */
1467#define IEM_MC_IND_JMP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1468 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1469
1470/** Variant of IEM_MC_IND_JMP_U64_AND_FINISH for use in 64-bit code that checks
1471 * and clears flags. */
1472#define IEM_MC_IND_JMP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1473 IEM_MC_IND_JMP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1474 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1475
1476#undef IEM_MC_IND_JMP_U64_AND_FINISH
1477
1478
1479/** Same as iemRegRipJumpU16AndFinishNoFlags,
1480 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1481DECL_INLINE_THROW(uint32_t)
1482iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1483 uint8_t idxInstr, uint8_t cbVar)
1484{
1485 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1486 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1487
1488 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1489 PC which will be handled specially by the two workers below if they raise a GP. */
1490 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1491 uint8_t const idxOldPcReg = fMayRaiseGp0
1492 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1493 : UINT8_MAX;
1494 if (fMayRaiseGp0)
1495 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1496
1497 /* Get a register with the new PC loaded from idxVarPc.
1498 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1499 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1500
1501 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1502 if (fMayRaiseGp0)
1503 {
1504 if (f64Bit)
1505 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1506 else
1507 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1508 }
1509
1510 /* Store the result. */
1511 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1512
1513#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1514 pReNative->Core.offPc = 0;
1515 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1516# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1518 pReNative->Core.fDebugPcInitialized = true;
1519 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1520# endif
1521#endif
1522
1523 if (idxOldPcReg != UINT8_MAX)
1524 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1525 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1526 /** @todo implictly free the variable? */
1527
1528 return off;
1529}
1530
1531
1532
1533/*********************************************************************************************************************************
1534* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1535*********************************************************************************************************************************/
1536
1537/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1538 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1539DECL_FORCE_INLINE_THROW(uint32_t)
1540iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1541{
1542 /* Use16BitSp: */
1543#ifdef RT_ARCH_AMD64
1544 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1545 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1546#else
1547 /* sub regeff, regrsp, #cbMem */
1548 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1549 /* and regeff, regeff, #0xffff */
1550 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1551 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1552 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1553 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1554#endif
1555 return off;
1556}
1557
1558
1559DECL_FORCE_INLINE(uint32_t)
1560iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1561{
1562 /* Use32BitSp: */
1563 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1564 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1565 return off;
1566}
1567
1568
1569template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1570DECL_INLINE_THROW(uint32_t)
1571iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1572 uintptr_t pfnFunction, uint8_t idxInstr)
1573{
1574 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
1575 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
1576
1577 /*
1578 * Assert sanity.
1579 */
1580#ifdef VBOX_STRICT
1581 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1582 {
1583 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1584 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1585 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1586 Assert( pfnFunction
1587 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1588 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1589 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1590 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1591 : UINT64_C(0xc000b000a0009000) ));
1592 }
1593 else
1594 Assert( pfnFunction
1595 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1596 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1597 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1598 : UINT64_C(0xc000b000a0009000) ));
1599#endif
1600
1601#ifdef VBOX_STRICT
1602 /*
1603 * Check that the fExec flags we've got make sense.
1604 */
1605 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1606#endif
1607
1608 /*
1609 * To keep things simple we have to commit any pending writes first as we
1610 * may end up making calls.
1611 */
1612 /** @todo we could postpone this till we make the call and reload the
1613 * registers after returning from the call. Not sure if that's sensible or
1614 * not, though. */
1615 off = iemNativeRegFlushPendingWrites(pReNative, off);
1616
1617 /*
1618 * First we calculate the new RSP and the effective stack pointer value.
1619 * For 64-bit mode and flat 32-bit these two are the same.
1620 * (Code structure is very similar to that of PUSH)
1621 */
1622 RT_CONSTEXPR
1623 uint8_t const cbMem = a_cBitsVar / 8;
1624 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1625 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1626 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1627 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1628 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1629 {
1630 Assert(idxRegEffSp == idxRegRsp);
1631 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1632 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1633 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1634 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1635 else
1636 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1637 }
1638 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1639 {
1640 Assert(idxRegEffSp != idxRegRsp);
1641 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1642 kIemNativeGstRegUse_ReadOnly);
1643#ifdef RT_ARCH_AMD64
1644 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1645#else
1646 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1647#endif
1648 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1649 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1650 offFixupJumpToUseOtherBitSp = off;
1651 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1652 {
1653 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1654 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1655 }
1656 else
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1659 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1662 }
1663 /* SpUpdateEnd: */
1664 uint32_t const offLabelSpUpdateEnd = off;
1665
1666 /*
1667 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1668 * we're skipping lookup).
1669 */
1670 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1671 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1672 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1673 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1674 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1675 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1676 : UINT32_MAX;
1677 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1678
1679
1680 if (!TlbState.fSkip)
1681 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1682 else
1683 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1684
1685 /*
1686 * Use16BitSp:
1687 */
1688 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1689 {
1690#ifdef RT_ARCH_AMD64
1691 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1692#else
1693 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1694#endif
1695 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1696 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1697 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1698 else
1699 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1700 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1702 }
1703
1704 /*
1705 * TlbMiss:
1706 *
1707 * Call helper to do the pushing.
1708 */
1709 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1710
1711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1713#else
1714 RT_NOREF(idxInstr);
1715#endif
1716
1717 /* Save variables in volatile registers. */
1718 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1719 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1720 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1721 | (RT_BIT_32(idxRegPc));
1722 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1723
1724 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1725 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1726 {
1727 /* Swap them using ARG0 as temp register: */
1728 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1731 }
1732 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1733 {
1734 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1736
1737 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1738 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1740 }
1741 else
1742 {
1743 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745
1746 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1748 }
1749
1750#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1751 /* Do delayed EFLAGS calculations. */
1752 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1753 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1754#endif
1755
1756 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1757 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1758
1759 /* Done setting up parameters, make the call. */
1760 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1761
1762 /* Restore variables and guest shadow registers to volatile registers. */
1763 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1764 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1765
1766#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1767 if (!TlbState.fSkip)
1768 {
1769 /* end of TlbMiss - Jump to the done label. */
1770 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1771 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1772
1773 /*
1774 * TlbLookup:
1775 */
1776 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
1777 idxLabelTlbLookup, idxLabelTlbMiss,
1778 idxRegMemResult);
1779
1780 /*
1781 * Emit code to do the actual storing / fetching.
1782 */
1783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1784# ifdef IEM_WITH_TLB_STATISTICS
1785 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1786 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1787# endif
1788 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1789 if RT_CONSTEXPR_IF(cbMem == 2)
1790 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1791 else if RT_CONSTEXPR_IF(cbMem == 4)
1792 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1793 else
1794 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1795
1796 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1797 TlbState.freeRegsAndReleaseVars(pReNative);
1798
1799 /*
1800 * TlbDone:
1801 *
1802 * Commit the new RSP value.
1803 */
1804 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1805 }
1806#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1807
1808#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1809 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
1810#endif
1811 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1812 if (idxRegEffSp != idxRegRsp)
1813 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1814
1815 return off;
1816}
1817
1818
1819/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1820#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1821 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1822
1823/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1824 * clears flags. */
1825#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1826 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1827 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1828
1829/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1830#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1831 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1832
1833/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1834 * clears flags. */
1835#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1836 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1837 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1838
1839#undef IEM_MC_IND_CALL_U16_AND_FINISH
1840
1841
1842/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1843#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1844 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1845
1846/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1847 * clears flags. */
1848#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1849 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1850 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1851
1852#undef IEM_MC_IND_CALL_U32_AND_FINISH
1853
1854
1855/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1856 * an extra parameter, for use in 64-bit code. */
1857#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1858 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1859
1860
1861/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1862 * an extra parameter, for use in 64-bit code and we need to check and clear
1863 * flags. */
1864#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1865 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1866 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1867
1868#undef IEM_MC_IND_CALL_U64_AND_FINISH
1869
1870/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1871 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1872DECL_INLINE_THROW(uint32_t)
1873iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1874 uint8_t idxInstr, uint8_t cbVar)
1875{
1876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1877 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1878
1879 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1880 off = iemNativeRegFlushPendingWrites(pReNative, off);
1881
1882#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1883 Assert(pReNative->Core.offPc == 0);
1884 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1885#endif
1886
1887 /* Get a register with the new PC loaded from idxVarPc.
1888 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1889 uint8_t const idxPcRegNew = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1890
1891 /* Check limit (may #GP(0) + exit TB). */
1892 if (!f64Bit)
1893/** @todo we can skip this test in FLAT 32-bit mode. */
1894 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1895 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1896 else if (cbVar > sizeof(uint32_t))
1897 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1898
1899#if 1
1900 /* Allocate a temporary PC register, we don't want it shadowed. */
1901 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1902 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1903#else
1904 /* Allocate a temporary PC register. */
1905 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1906 true /*fNoVolatileRegs*/);
1907#endif
1908
1909 /* Perform the addition and push the variable to the guest stack. */
1910 /** @todo Flat variants for PC32 variants. */
1911 switch (cbVar)
1912 {
1913 case sizeof(uint16_t):
1914 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1915 /* Truncate the result to 16-bit IP. */
1916 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1917 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1918 break;
1919 case sizeof(uint32_t):
1920 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1921 /** @todo In FLAT mode we can use the flat variant. */
1922 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1923 break;
1924 case sizeof(uint64_t):
1925 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1926 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1927 break;
1928 default:
1929 AssertFailed();
1930 }
1931
1932 /* RSP got changed, so do this again. */
1933 off = iemNativeRegFlushPendingWrites(pReNative, off);
1934
1935 /* Store the result. */
1936 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
1937#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1938 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1939 pReNative->Core.fDebugPcInitialized = true;
1940 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1941#endif
1942
1943#if 1
1944 /* Need to transfer the shadow information to the new RIP register. */
1945 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1946#else
1947 /* Sync the new PC. */
1948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxPcRegNew);
1949#endif
1950 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1951 iemNativeRegFreeTmp(pReNative, idxPcReg);
1952 /** @todo implictly free the variable? */
1953
1954 return off;
1955}
1956
1957
1958/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1959 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1960#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1961 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1962
1963/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1964 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1965 * flags. */
1966#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1967 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1968 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1969
1970/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1971 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1972#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1973 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1974
1975/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1976 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1977 * flags. */
1978#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1979 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1980 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1981
1982/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1983 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1984#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1985 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1986
1987/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1988 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1989 * flags. */
1990#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1991 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1992 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1993
1994#undef IEM_MC_REL_CALL_S16_AND_FINISH
1995
1996/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1997 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1998DECL_INLINE_THROW(uint32_t)
1999iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2000 uint8_t idxInstr)
2001{
2002 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2003 off = iemNativeRegFlushPendingWrites(pReNative, off);
2004
2005#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2006 Assert(pReNative->Core.offPc == 0);
2007 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2008#endif
2009
2010 /* Allocate a temporary PC register. */
2011 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2012 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2013 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2014
2015 /* Calculate the new RIP. */
2016 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2017 /* Truncate the result to 16-bit IP. */
2018 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2019 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2020 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2021
2022 /* Truncate the result to 16-bit IP. */
2023 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2024
2025 /* Check limit (may #GP(0) + exit TB). */
2026 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2027
2028 /* Perform the addition and push the variable to the guest stack. */
2029 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2030
2031 /* RSP got changed, so flush again. */
2032 off = iemNativeRegFlushPendingWrites(pReNative, off);
2033
2034 /* Store the result. */
2035 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2036#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2037 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2038 pReNative->Core.fDebugPcInitialized = true;
2039 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2040#endif
2041
2042 /* Need to transfer the shadow information to the new RIP register. */
2043 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2044 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2045 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2046
2047 return off;
2048}
2049
2050
2051/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2052 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2053#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2054 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2055
2056/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2057 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2058 * flags. */
2059#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2060 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2061 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2062
2063#undef IEM_MC_REL_CALL_S32_AND_FINISH
2064
2065/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2066 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2067DECL_INLINE_THROW(uint32_t)
2068iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2069 uint8_t idxInstr)
2070{
2071 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2072 off = iemNativeRegFlushPendingWrites(pReNative, off);
2073
2074#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2075 Assert(pReNative->Core.offPc == 0);
2076 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2077#endif
2078
2079 /* Allocate a temporary PC register. */
2080 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2081 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2082 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2083
2084 /* Update the EIP to get the return address. */
2085 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2086
2087 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2088 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2089 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2090 /** @todo we can skip this test in FLAT 32-bit mode. */
2091 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2092
2093 /* Perform Perform the return address to the guest stack. */
2094 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2095 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2096
2097 /* RSP got changed, so do this again. */
2098 off = iemNativeRegFlushPendingWrites(pReNative, off);
2099
2100 /* Store the result. */
2101 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2102#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2103 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2104 pReNative->Core.fDebugPcInitialized = true;
2105 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2106#endif
2107
2108 /* Need to transfer the shadow information to the new RIP register. */
2109 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2110 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2111 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2112
2113 return off;
2114}
2115
2116
2117/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2118 * an extra parameter, for use in 64-bit code. */
2119#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2120 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2121
2122/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2123 * an extra parameter, for use in 64-bit code and we need to check and clear
2124 * flags. */
2125#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2126 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2127 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2128
2129#undef IEM_MC_REL_CALL_S64_AND_FINISH
2130
2131/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2132 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2133DECL_INLINE_THROW(uint32_t)
2134iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2135 uint8_t idxInstr)
2136{
2137 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2138 off = iemNativeRegFlushPendingWrites(pReNative, off);
2139
2140#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2141 Assert(pReNative->Core.offPc == 0);
2142 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2143#endif
2144
2145 /* Allocate a temporary PC register. */
2146 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2147 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2148 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2149
2150 /* Update the RIP to get the return address. */
2151 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2152
2153 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2154 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2155 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2156 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2157
2158 /* Perform Perform the return address to the guest stack. */
2159 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2160
2161 /* RSP got changed, so do this again. */
2162 off = iemNativeRegFlushPendingWrites(pReNative, off);
2163
2164 /* Store the result. */
2165 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2166#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2167 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2168 pReNative->Core.fDebugPcInitialized = true;
2169 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2170#endif
2171
2172 /* Need to transfer the shadow information to the new RIP register. */
2173 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2174 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2175 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2176
2177 return off;
2178}
2179
2180
2181/*********************************************************************************************************************************
2182* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2183*********************************************************************************************************************************/
2184
2185DECL_FORCE_INLINE_THROW(uint32_t)
2186iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2187 uint16_t cbPopAdd, uint8_t idxRegTmp)
2188{
2189 /* Use16BitSp: */
2190#ifdef RT_ARCH_AMD64
2191 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2192 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2193 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2194 RT_NOREF(idxRegTmp);
2195
2196#elif defined(RT_ARCH_ARM64)
2197 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2198 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2199 /* add tmp, regrsp, #cbMem */
2200 uint16_t const cbCombined = cbMem + cbPopAdd;
2201 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2202 if (cbCombined >= RT_BIT_32(12))
2203 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2204 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2205 /* and tmp, tmp, #0xffff */
2206 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2207 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2208 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2209 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2210
2211#else
2212# error "Port me"
2213#endif
2214 return off;
2215}
2216
2217
2218DECL_FORCE_INLINE_THROW(uint32_t)
2219iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2220 uint16_t cbPopAdd)
2221{
2222 /* Use32BitSp: */
2223 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2224 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2225 return off;
2226}
2227
2228
2229/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2230#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2231 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2232
2233/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2234#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2235 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2236 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2237 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2238 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2239
2240/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2241#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2242 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2243 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2244 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2245 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2246
2247/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2248 * clears flags. */
2249#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2250 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2251 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2252
2253/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2254 * clears flags. */
2255#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2256 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2257 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2258
2259/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2260 * clears flags. */
2261#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2262 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2263 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2264
2265/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2266template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2267DECL_INLINE_THROW(uint32_t)
2268iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2269{
2270 RT_NOREF(cbInstr);
2271 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2272
2273#ifdef VBOX_STRICT
2274 /*
2275 * Check that the fExec flags we've got make sense.
2276 */
2277 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2278#endif
2279
2280 /*
2281 * To keep things simple we have to commit any pending writes first as we
2282 * may end up making calls.
2283 */
2284 off = iemNativeRegFlushPendingWrites(pReNative, off);
2285
2286 /*
2287 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2288 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2289 * directly as the effective stack pointer.
2290 *
2291 * (Code structure is very similar to that of PUSH)
2292 *
2293 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2294 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2295 * aren't commonly used (or useful) and thus not in need of optimizing.
2296 *
2297 * Note! For non-flat modes the guest RSP is not allocated for update but
2298 * rather for calculation as the shadowed register would remain modified
2299 * even if the return address throws a #GP(0) due to being outside the
2300 * CS limit causing a wrong stack pointer value in the guest (see the
2301 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2302 * the shadowing is transfered to the new register returned by
2303 * iemNativeRegAllocTmpForGuestReg() at the end.
2304 */
2305 RT_CONSTEXPR
2306 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2307 ? sizeof(uint64_t)
2308 : a_enmEffOpSize == IEMMODE_32BIT
2309 ? sizeof(uint32_t)
2310 : sizeof(uint16_t);
2311/** @todo the basic flatness could be detected by the threaded compiler step
2312 * like for the other macros... worth it? */
2313 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2314 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2315 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2316 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2317 : fFlat
2318 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2319 : a_enmEffOpSize == IEMMODE_32BIT
2320 ? (uintptr_t)iemNativeHlpStackFetchU32
2321 : (uintptr_t)iemNativeHlpStackFetchU16;
2322 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2323 fFlat ? kIemNativeGstRegUse_ForUpdate
2324 : kIemNativeGstRegUse_Calculation,
2325 true /*fNoVolatileRegs*/);
2326 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2327 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2328 * will be the resulting register value. */
2329 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2330
2331 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2332 if (fFlat)
2333 Assert(idxRegEffSp == idxRegRsp);
2334 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2335 {
2336 Assert(idxRegEffSp != idxRegRsp);
2337 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2338 kIemNativeGstRegUse_ReadOnly);
2339#ifdef RT_ARCH_AMD64
2340 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2341#else
2342 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2343#endif
2344 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2345 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2346 offFixupJumpToUseOtherBitSp = off;
2347 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2348 {
2349 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2350 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2351 }
2352 else
2353 {
2354 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2355 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2356 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2357 idxRegMemResult);
2358 }
2359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2360 }
2361 /* SpUpdateEnd: */
2362 uint32_t const offLabelSpUpdateEnd = off;
2363
2364 /*
2365 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2366 * we're skipping lookup).
2367 */
2368 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2369 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2370 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2371 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2372 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2373 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2374 : UINT32_MAX;
2375
2376 if (!TlbState.fSkip)
2377 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2378 else
2379 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2380
2381 /*
2382 * Use16BitSp:
2383 */
2384 if (!fFlat)
2385 {
2386#ifdef RT_ARCH_AMD64
2387 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2388#else
2389 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2390#endif
2391 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2392 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2393 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2394 idxRegMemResult);
2395 else
2396 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2397 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2399 }
2400
2401 /*
2402 * TlbMiss:
2403 *
2404 * Call helper to do the pushing.
2405 */
2406 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2407
2408#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2409 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2410#else
2411 RT_NOREF(idxInstr);
2412#endif
2413
2414 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2415 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2416 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2417 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2418
2419
2420 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2421 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2422 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2423
2424#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2425 /* Do delayed EFLAGS calculations. */
2426 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2427#endif
2428
2429 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2430 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2431
2432 /* Done setting up parameters, make the call. */
2433 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2434
2435 /* Move the return register content to idxRegMemResult. */
2436 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2438
2439 /* Restore variables and guest shadow registers to volatile registers. */
2440 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2441 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2442
2443#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2444 if (!TlbState.fSkip)
2445 {
2446 /* end of TlbMiss - Jump to the done label. */
2447 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2448 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2449
2450 /*
2451 * TlbLookup:
2452 */
2453 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
2454 idxLabelTlbLookup, idxLabelTlbMiss,
2455 idxRegMemResult);
2456
2457 /*
2458 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2459 */
2460 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2461# ifdef IEM_WITH_TLB_STATISTICS
2462 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2463 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2464# endif
2465 switch (cbMem)
2466 {
2467 case 2:
2468 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2469 break;
2470 case 4:
2471 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2472 break;
2473 case 8:
2474 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2475 break;
2476 default:
2477 AssertFailed();
2478 }
2479
2480 TlbState.freeRegsAndReleaseVars(pReNative);
2481
2482 /*
2483 * TlbDone:
2484 *
2485 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2486 * commit the popped register value.
2487 */
2488 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2489 }
2490#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2491
2492 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2493 if RT_CONSTEXPR_IF(!a_f64Bit)
2494/** @todo we can skip this test in FLAT 32-bit mode. */
2495 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2496 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2497 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2498 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2499
2500 /* Complete RSP calculation for FLAT mode. */
2501 if (idxRegEffSp == idxRegRsp)
2502 {
2503 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2504 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2505 else
2506 {
2507 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2508 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2509 }
2510 }
2511
2512 /* Commit the result and clear any current guest shadows for RIP. */
2513 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
2514 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>( pReNative, off, idxRegMemResult);
2515 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2518 pReNative->Core.fDebugPcInitialized = true;
2519 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2520#endif
2521
2522 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2523 if (!fFlat)
2524 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2525
2526 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2527 if (idxRegEffSp != idxRegRsp)
2528 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2529 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2530 return off;
2531}
2532
2533
2534/*********************************************************************************************************************************
2535* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2536*********************************************************************************************************************************/
2537
2538#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2539 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2540
2541/**
2542 * Emits code to check if a \#NM exception should be raised.
2543 *
2544 * @returns New code buffer offset, UINT32_MAX on failure.
2545 * @param pReNative The native recompile state.
2546 * @param off The code buffer offset.
2547 * @param idxInstr The current instruction.
2548 */
2549DECL_INLINE_THROW(uint32_t)
2550iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2551{
2552 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2553
2554 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2555 {
2556 /*
2557 * Make sure we don't have any outstanding guest register writes as we may
2558 * raise an #NM and all guest register must be up to date in CPUMCTX.
2559 */
2560 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2561 off = iemNativeRegFlushPendingWrites(pReNative, off);
2562
2563#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2564 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2565#else
2566 RT_NOREF(idxInstr);
2567#endif
2568
2569 /* Allocate a temporary CR0 register. */
2570 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2571 kIemNativeGstRegUse_ReadOnly);
2572
2573 /*
2574 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2575 * return raisexcpt();
2576 */
2577 /* Test and jump. */
2578 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2579 X86_CR0_EM | X86_CR0_TS);
2580
2581 /* Free but don't flush the CR0 register. */
2582 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2583
2584 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2585 }
2586 else
2587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2588
2589 return off;
2590}
2591
2592
2593#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2594 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2595
2596/**
2597 * Emits code to check if a \#NM exception should be raised.
2598 *
2599 * @returns New code buffer offset, UINT32_MAX on failure.
2600 * @param pReNative The native recompile state.
2601 * @param off The code buffer offset.
2602 * @param idxInstr The current instruction.
2603 */
2604DECL_INLINE_THROW(uint32_t)
2605iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2606{
2607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2608
2609 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2610 {
2611 /*
2612 * Make sure we don't have any outstanding guest register writes as we may
2613 * raise an #NM and all guest register must be up to date in CPUMCTX.
2614 */
2615 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2616 off = iemNativeRegFlushPendingWrites(pReNative, off);
2617
2618#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2619 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2620#else
2621 RT_NOREF(idxInstr);
2622#endif
2623
2624 /* Allocate a temporary CR0 register. */
2625 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2626 kIemNativeGstRegUse_Calculation);
2627
2628 /*
2629 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2630 * return raisexcpt();
2631 */
2632 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2633 /* Test and jump. */
2634 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2635
2636 /* Free the CR0 register. */
2637 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2638
2639 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2640 }
2641 else
2642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2643
2644 return off;
2645}
2646
2647
2648#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2649 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2650
2651/**
2652 * Emits code to check if a \#MF exception should be raised.
2653 *
2654 * @returns New code buffer offset, UINT32_MAX on failure.
2655 * @param pReNative The native recompile state.
2656 * @param off The code buffer offset.
2657 * @param idxInstr The current instruction.
2658 */
2659DECL_INLINE_THROW(uint32_t)
2660iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2661{
2662 /*
2663 * Make sure we don't have any outstanding guest register writes as we may
2664 * raise an #MF and all guest register must be up to date in CPUMCTX.
2665 */
2666 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2667 off = iemNativeRegFlushPendingWrites(pReNative, off);
2668
2669#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2670 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2671#else
2672 RT_NOREF(idxInstr);
2673#endif
2674
2675 /* Allocate a temporary FSW register. */
2676 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2677 kIemNativeGstRegUse_ReadOnly);
2678
2679 /*
2680 * if (FSW & X86_FSW_ES != 0)
2681 * return raisexcpt();
2682 */
2683 /* Test and jump. */
2684 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2685
2686 /* Free but don't flush the FSW register. */
2687 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2688
2689 return off;
2690}
2691
2692
2693#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2694 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2695
2696/**
2697 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2698 *
2699 * @returns New code buffer offset, UINT32_MAX on failure.
2700 * @param pReNative The native recompile state.
2701 * @param off The code buffer offset.
2702 * @param idxInstr The current instruction.
2703 */
2704DECL_INLINE_THROW(uint32_t)
2705iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2706{
2707 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2708
2709 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2710 {
2711 /*
2712 * Make sure we don't have any outstanding guest register writes as we may
2713 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2714 */
2715 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2716 off = iemNativeRegFlushPendingWrites(pReNative, off);
2717
2718#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2719 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2720#else
2721 RT_NOREF(idxInstr);
2722#endif
2723
2724 /* Allocate a temporary CR0 and CR4 register. */
2725 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2726 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2727 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2728
2729 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2730#ifdef RT_ARCH_AMD64
2731 /*
2732 * We do a modified test here:
2733 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2734 * else { goto RaiseSseRelated; }
2735 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2736 * all targets except the 386, which doesn't support SSE, this should
2737 * be a safe assumption.
2738 */
2739 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2740 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2741 //pCodeBuf[off++] = 0xcc;
2742 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2743 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2744 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2745 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2746 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2747 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2748
2749#elif defined(RT_ARCH_ARM64)
2750 /*
2751 * We do a modified test here:
2752 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2753 * else { goto RaiseSseRelated; }
2754 */
2755 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2756 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2757 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2758 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2759 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2760 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2761 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2762 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2763 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2764 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2765 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2766 idxTmpReg, false /*f64Bit*/);
2767
2768#else
2769# error "Port me!"
2770#endif
2771
2772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2773 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2774 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2775 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2776
2777 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2778 }
2779 else
2780 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2781
2782 return off;
2783}
2784
2785
2786#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2787 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2788
2789/**
2790 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2791 *
2792 * @returns New code buffer offset, UINT32_MAX on failure.
2793 * @param pReNative The native recompile state.
2794 * @param off The code buffer offset.
2795 * @param idxInstr The current instruction.
2796 */
2797DECL_INLINE_THROW(uint32_t)
2798iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2799{
2800 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2801
2802 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2803 {
2804 /*
2805 * Make sure we don't have any outstanding guest register writes as we may
2806 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2807 */
2808 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2809 off = iemNativeRegFlushPendingWrites(pReNative, off);
2810
2811#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2812 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2813#else
2814 RT_NOREF(idxInstr);
2815#endif
2816
2817 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2818 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2819 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2820 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2821 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2822
2823 /*
2824 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2825 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2826 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2827 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2828 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2829 * { likely }
2830 * else { goto RaiseAvxRelated; }
2831 */
2832#ifdef RT_ARCH_AMD64
2833 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2834 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2835 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2836 ^ 0x1a) ) { likely }
2837 else { goto RaiseAvxRelated; } */
2838 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2839 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2840 //pCodeBuf[off++] = 0xcc;
2841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2842 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2843 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2844 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2845 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2846 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2847 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2848 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2849 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2850 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2851 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2852
2853#elif defined(RT_ARCH_ARM64)
2854 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2855 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2856 else { goto RaiseAvxRelated; } */
2857 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2858 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2859 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2860 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2861 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2862 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2863 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2864 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2865 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2866 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2867 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2868 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2869 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2870 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2871 idxTmpReg, false /*f64Bit*/);
2872
2873#else
2874# error "Port me!"
2875#endif
2876
2877 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2878 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2879 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2880 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2881
2882 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2883 }
2884 else
2885 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2886
2887 return off;
2888}
2889
2890
2891#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2892 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2893
2894/**
2895 * Emits code to raise a \#DE if a local variable is zero.
2896 *
2897 * @returns New code buffer offset, UINT32_MAX on failure.
2898 * @param pReNative The native recompile state.
2899 * @param off The code buffer offset.
2900 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2901 * @param idxInstr The current instruction.
2902 */
2903DECL_INLINE_THROW(uint32_t)
2904iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2905{
2906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2907 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2908
2909 /* Make sure we don't have any outstanding guest register writes as we may. */
2910 off = iemNativeRegFlushPendingWrites(pReNative, off);
2911
2912 /* Set the instruction number if we're counting. */
2913#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2914 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2915#else
2916 RT_NOREF(idxInstr);
2917#endif
2918
2919 /* Do the job we're here for. */
2920 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2921 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2922 iemNativeVarRegisterRelease(pReNative, idxVar);
2923
2924 return off;
2925}
2926
2927
2928#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2929 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2930
2931/**
2932 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2933 *
2934 * @returns New code buffer offset, UINT32_MAX on failure.
2935 * @param pReNative The native recompile state.
2936 * @param off The code buffer offset.
2937 * @param idxInstr The current instruction.
2938 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2939 * @param cbAlign The alignment in bytes to check against.
2940 */
2941DECL_INLINE_THROW(uint32_t)
2942iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2943 uint8_t idxVarEffAddr, uint8_t cbAlign)
2944{
2945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2946 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2947
2948 /*
2949 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2950 */
2951 off = iemNativeRegFlushPendingWrites(pReNative, off);
2952
2953#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2954 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2955#else
2956 RT_NOREF(idxInstr);
2957#endif
2958
2959 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2960 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2961 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2962
2963 return off;
2964}
2965
2966
2967/*********************************************************************************************************************************
2968* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2969*********************************************************************************************************************************/
2970
2971/**
2972 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2973 *
2974 * @returns Pointer to the condition stack entry on success, NULL on failure
2975 * (too many nestings)
2976 */
2977DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2978{
2979 uint32_t const idxStack = pReNative->cCondDepth;
2980 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2981
2982 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2983 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2984
2985 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2986 pEntry->fInElse = false;
2987 pEntry->fIfExitTb = false;
2988 pEntry->fElseExitTb = false;
2989 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2990 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2991
2992 return pEntry;
2993}
2994
2995
2996/**
2997 * Start of the if-block, snapshotting the register and variable state.
2998 */
2999DECL_INLINE_THROW(void)
3000iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3001{
3002 Assert(offIfBlock != UINT32_MAX);
3003 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3004 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3005 Assert(!pEntry->fInElse);
3006
3007 /* Define the start of the IF block if request or for disassembly purposes. */
3008 if (idxLabelIf != UINT32_MAX)
3009 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3010#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3011 else
3012 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3013#else
3014 RT_NOREF(offIfBlock);
3015#endif
3016
3017 /* Copy the initial state so we can restore it in the 'else' block. */
3018 pEntry->InitialState = pReNative->Core;
3019}
3020
3021
3022#define IEM_MC_ELSE() } while (0); \
3023 off = iemNativeEmitElse(pReNative, off); \
3024 do {
3025
3026/** Emits code related to IEM_MC_ELSE. */
3027DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3028{
3029 /* Check sanity and get the conditional stack entry. */
3030 Assert(off != UINT32_MAX);
3031 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3032 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3033 Assert(!pEntry->fInElse);
3034
3035 /* We can skip dirty register flushing and the dirty register flushing if
3036 the branch already jumped to a TB exit. */
3037 if (!pEntry->fIfExitTb)
3038 {
3039#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3040 /* Writeback any dirty shadow registers. */
3041 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3042 * in one of the branches and leave guest registers already dirty before the start of the if
3043 * block alone. */
3044 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3045#endif
3046
3047 /* Jump to the endif. */
3048 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3049 }
3050# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3051 else
3052 Assert(pReNative->Core.offPc == 0);
3053# endif
3054
3055 /* Define the else label and enter the else part of the condition. */
3056 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3057 pEntry->fInElse = true;
3058
3059 /* Snapshot the core state so we can do a merge at the endif and restore
3060 the snapshot we took at the start of the if-block. */
3061 pEntry->IfFinalState = pReNative->Core;
3062 pReNative->Core = pEntry->InitialState;
3063
3064 return off;
3065}
3066
3067
3068#define IEM_MC_ENDIF() } while (0); \
3069 off = iemNativeEmitEndIf(pReNative, off)
3070
3071/** Emits code related to IEM_MC_ENDIF. */
3072DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3073{
3074 /* Check sanity and get the conditional stack entry. */
3075 Assert(off != UINT32_MAX);
3076 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3077 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3078
3079#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3080 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3081#endif
3082
3083 /*
3084 * If either of the branches exited the TB, we can take the state from the
3085 * other branch and skip all the merging headache.
3086 */
3087 bool fDefinedLabels = false;
3088 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3089 {
3090#ifdef VBOX_STRICT
3091 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3092 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3093 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3094 ? &pEntry->IfFinalState : &pReNative->Core;
3095# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3096 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3097# endif
3098# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3099 Assert(pExitCoreState->offPc == 0);
3100# endif
3101 RT_NOREF(pExitCoreState);
3102#endif
3103
3104 if (!pEntry->fIfExitTb)
3105 {
3106 Assert(pEntry->fInElse);
3107 pReNative->Core = pEntry->IfFinalState;
3108 }
3109 }
3110 else
3111 {
3112 /*
3113 * Now we have find common group with the core state at the end of the
3114 * if-final. Use the smallest common denominator and just drop anything
3115 * that isn't the same in both states.
3116 */
3117 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3118 * which is why we're doing this at the end of the else-block.
3119 * But we'd need more info about future for that to be worth the effort. */
3120 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3121#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3122 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3123 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3124 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3125#endif
3126
3127 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3128 {
3129#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3130 /*
3131 * If the branch has differences in dirty shadow registers, we will flush
3132 * the register only dirty in the current branch and dirty any that's only
3133 * dirty in the other one.
3134 */
3135 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3136 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3137 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3138 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3139 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3140 if (!fGstRegDirtyDiff)
3141 { /* likely */ }
3142 else
3143 {
3144 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3145 if (fGstRegDirtyHead)
3146 {
3147 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3148 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3149 }
3150 }
3151#endif
3152
3153 /*
3154 * Shadowed guest registers.
3155 *
3156 * We drop any shadows where the two states disagree about where
3157 * things are kept. We may end up flushing dirty more registers
3158 * here, if the two branches keeps things in different registers.
3159 */
3160 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3161 if (fGstRegs)
3162 {
3163 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3164 do
3165 {
3166 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3167 fGstRegs &= ~RT_BIT_64(idxGstReg);
3168
3169 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3170 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3171 if ( idxCurHstReg != idxOtherHstReg
3172 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3173 {
3174#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3175 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3176 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3177 idxOtherHstReg, pOther->bmGstRegShadows));
3178#else
3179 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3180 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3181 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3182 idxOtherHstReg, pOther->bmGstRegShadows,
3183 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3184 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3185 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3186 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3187 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3188#endif
3189 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3190 }
3191 } while (fGstRegs);
3192 }
3193 else
3194 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3195
3196#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3197 /*
3198 * Generate jumpy code for flushing dirty registers from the other
3199 * branch that aren't dirty in the current one.
3200 */
3201 if (!fGstRegDirtyTail)
3202 { /* likely */ }
3203 else
3204 {
3205 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3206 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3207
3208 /* First the current branch has to jump over the dirty flushing from the other branch. */
3209 uint32_t const offFixup1 = off;
3210 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3211
3212 /* Put the endif and maybe else label here so the other branch ends up here. */
3213 if (!pEntry->fInElse)
3214 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3215 else
3216 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3217 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3218 fDefinedLabels = true;
3219
3220 /* Flush the dirty guest registers from the other branch. */
3221 while (fGstRegDirtyTail)
3222 {
3223 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3224 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3225 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3226 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3227 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3228
3229 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3230
3231 /* Mismatching shadowing should've been dropped in the previous step already. */
3232 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3233 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3234 }
3235
3236 /* Here is the actual endif label, fixup the above jump to land here. */
3237 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3238 }
3239#endif
3240
3241 /*
3242 * Check variables next. For now we must require them to be identical
3243 * or stuff we can recreate. (No code is emitted here.)
3244 */
3245 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3246#ifdef VBOX_STRICT
3247 uint32_t const offAssert = off;
3248#endif
3249 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3250 if (fVars)
3251 {
3252 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3253 do
3254 {
3255 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3256 fVars &= ~RT_BIT_32(idxVar);
3257
3258 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3259 {
3260 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3261 continue;
3262 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3263 {
3264 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3265 if (idxHstReg != UINT8_MAX)
3266 {
3267 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3268 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3269 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3270 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3271 }
3272 continue;
3273 }
3274 }
3275 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3276 continue;
3277
3278 /* Irreconcilable, so drop it. */
3279 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3280 if (idxHstReg != UINT8_MAX)
3281 {
3282 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3283 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3284 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3285 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3286 }
3287 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3288 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3289 } while (fVars);
3290 }
3291 Assert(off == offAssert);
3292
3293 /*
3294 * Finally, check that the host register allocations matches.
3295 */
3296 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3297 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3298 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3299 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3300 }
3301 }
3302
3303 /*
3304 * Define the endif label and maybe the else one if we're still in the 'if' part.
3305 */
3306 if (!fDefinedLabels)
3307 {
3308 if (!pEntry->fInElse)
3309 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3310 else
3311 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3312 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3313 }
3314
3315 /* Pop the conditional stack.*/
3316 pReNative->cCondDepth -= 1;
3317
3318 return off;
3319}
3320
3321
3322/**
3323 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3324 *
3325 * The compiler should be able to figure this out at compile time, so sprinkling
3326 * constexpr where ever possible here to nudge it along.
3327 */
3328template<uint32_t const a_fEfl>
3329RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3330{
3331 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3332 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3333 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3334 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3335 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3336 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3337 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3338}
3339
3340
3341/**
3342 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3343 *
3344 * The compiler should be able to figure this out at compile time, so sprinkling
3345 * constexpr where ever possible here to nudge it along.
3346 */
3347template<uint32_t const a_fEfl>
3348RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3349{
3350 AssertCompile( a_fEfl == X86_EFL_CF
3351 || a_fEfl == X86_EFL_PF
3352 || a_fEfl == X86_EFL_AF
3353 || a_fEfl == X86_EFL_ZF
3354 || a_fEfl == X86_EFL_SF
3355 || a_fEfl == X86_EFL_OF
3356 || a_fEfl == X86_EFL_DF);
3357 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3358 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3359 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3360 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3361 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3362 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3363 : X86_EFL_DF_BIT;
3364}
3365
3366
3367#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3368 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3369 do {
3370
3371/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3372DECL_INLINE_THROW(uint32_t)
3373iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3374{
3375 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3376 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3377 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3378
3379 /* Get the eflags. */
3380 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3381
3382 /* Test and jump. */
3383 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3384
3385 /* Free but don't flush the EFlags register. */
3386 iemNativeRegFreeTmp(pReNative, idxEflReg);
3387
3388 /* Make a copy of the core state now as we start the if-block. */
3389 iemNativeCondStartIfBlock(pReNative, off);
3390
3391 return off;
3392}
3393
3394
3395#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3396 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3397 do {
3398
3399/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3400DECL_INLINE_THROW(uint32_t)
3401iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3402{
3403 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3404 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3405 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3406
3407 /* Get the eflags. */
3408 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3409
3410 /* Test and jump. */
3411 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3412
3413 /* Free but don't flush the EFlags register. */
3414 iemNativeRegFreeTmp(pReNative, idxEflReg);
3415
3416 /* Make a copy of the core state now as we start the if-block. */
3417 iemNativeCondStartIfBlock(pReNative, off);
3418
3419 return off;
3420}
3421
3422
3423#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3424 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3425 iemNativeEflagsToLivenessMask<a_fBit>()); \
3426 do {
3427
3428/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3429DECL_INLINE_THROW(uint32_t)
3430iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3431{
3432 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3433 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3434 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3435
3436 /* Get the eflags. */
3437 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3438
3439 /* Test and jump. */
3440 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3441
3442 /* Free but don't flush the EFlags register. */
3443 iemNativeRegFreeTmp(pReNative, idxEflReg);
3444
3445 /* Make a copy of the core state now as we start the if-block. */
3446 iemNativeCondStartIfBlock(pReNative, off);
3447
3448 return off;
3449}
3450
3451
3452#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3453 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3454 iemNativeEflagsToLivenessMask<a_fBit>()); \
3455 do {
3456
3457/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3458DECL_INLINE_THROW(uint32_t)
3459iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3460{
3461 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3462 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3463 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3464
3465 /* Get the eflags. */
3466 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3467
3468 /* Test and jump. */
3469 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3470
3471 /* Free but don't flush the EFlags register. */
3472 iemNativeRegFreeTmp(pReNative, idxEflReg);
3473
3474 /* Make a copy of the core state now as we start the if-block. */
3475 iemNativeCondStartIfBlock(pReNative, off);
3476
3477 return off;
3478}
3479
3480
3481#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3482 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3483 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3484 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3485 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3486 do {
3487
3488#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3489 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3490 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3491 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3492 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3493 do {
3494
3495/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3496DECL_INLINE_THROW(uint32_t)
3497iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3498 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3499{
3500 Assert(iBitNo1 != iBitNo2);
3501 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3502 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3503 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3504
3505 /* Get the eflags. */
3506 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3507
3508#ifdef RT_ARCH_AMD64
3509 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3510
3511 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3512 if (iBitNo1 > iBitNo2)
3513 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3514 else
3515 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3516 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3517
3518#elif defined(RT_ARCH_ARM64)
3519 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3520 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3521
3522 /* and tmpreg, eflreg, #1<<iBitNo1 */
3523 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3524
3525 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3526 if (iBitNo1 > iBitNo2)
3527 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3528 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3529 else
3530 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3531 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3532
3533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3534
3535#else
3536# error "Port me"
3537#endif
3538
3539 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3540 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3541 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3542
3543 /* Free but don't flush the EFlags and tmp registers. */
3544 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3545 iemNativeRegFreeTmp(pReNative, idxEflReg);
3546
3547 /* Make a copy of the core state now as we start the if-block. */
3548 iemNativeCondStartIfBlock(pReNative, off);
3549
3550 return off;
3551}
3552
3553
3554#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3555 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3556 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3557 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3558 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3559 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3560 do {
3561
3562#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3563 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3564 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3565 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3566 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3567 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3568 do {
3569
3570/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3571 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3572DECL_INLINE_THROW(uint32_t)
3573iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3574 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3575{
3576 Assert(iBitNo1 != iBitNo);
3577 Assert(iBitNo2 != iBitNo);
3578 Assert(iBitNo2 != iBitNo1);
3579 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3580 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3581 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3582
3583 /* We need an if-block label for the non-inverted variant. */
3584 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3585 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3586
3587 /* Get the eflags. */
3588 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3589
3590#ifdef RT_ARCH_AMD64
3591 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3592#elif defined(RT_ARCH_ARM64)
3593 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3594#endif
3595
3596 /* Check for the lone bit first. */
3597 if (!fInverted)
3598 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3599 else
3600 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3601
3602 /* Then extract and compare the other two bits. */
3603#ifdef RT_ARCH_AMD64
3604 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3605 if (iBitNo1 > iBitNo2)
3606 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3607 else
3608 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3609 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3610
3611#elif defined(RT_ARCH_ARM64)
3612 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3613
3614 /* and tmpreg, eflreg, #1<<iBitNo1 */
3615 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3616
3617 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3618 if (iBitNo1 > iBitNo2)
3619 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3620 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3621 else
3622 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3623 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3624
3625 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3626
3627#else
3628# error "Port me"
3629#endif
3630
3631 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3632 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3633 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3634
3635 /* Free but don't flush the EFlags and tmp registers. */
3636 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3637 iemNativeRegFreeTmp(pReNative, idxEflReg);
3638
3639 /* Make a copy of the core state now as we start the if-block. */
3640 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3641
3642 return off;
3643}
3644
3645
3646#define IEM_MC_IF_CX_IS_NZ() \
3647 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3648 do {
3649
3650/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3651DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3652{
3653 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3654
3655 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3656 kIemNativeGstRegUse_ReadOnly);
3657 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3658 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3659
3660 iemNativeCondStartIfBlock(pReNative, off);
3661 return off;
3662}
3663
3664
3665#define IEM_MC_IF_ECX_IS_NZ() \
3666 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3667 do {
3668
3669#define IEM_MC_IF_RCX_IS_NZ() \
3670 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3671 do {
3672
3673/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3674DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3675{
3676 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3677
3678 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3679 kIemNativeGstRegUse_ReadOnly);
3680 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3681 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3682
3683 iemNativeCondStartIfBlock(pReNative, off);
3684 return off;
3685}
3686
3687
3688#define IEM_MC_IF_CX_IS_NOT_ONE() \
3689 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3690 do {
3691
3692/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3693DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3694{
3695 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3696
3697 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3698 kIemNativeGstRegUse_ReadOnly);
3699#ifdef RT_ARCH_AMD64
3700 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3701#else
3702 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3703 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3704 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3705#endif
3706 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3707
3708 iemNativeCondStartIfBlock(pReNative, off);
3709 return off;
3710}
3711
3712
3713#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3714 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3715 do {
3716
3717#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3718 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3719 do {
3720
3721/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3722DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3723{
3724 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3725
3726 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3727 kIemNativeGstRegUse_ReadOnly);
3728 if (f64Bit)
3729 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3730 else
3731 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3732 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3733
3734 iemNativeCondStartIfBlock(pReNative, off);
3735 return off;
3736}
3737
3738
3739#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3740 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3741 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3742 iemNativeEflagsToLivenessMask<a_fBit>()); \
3743 do {
3744
3745#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3746 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3747 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3748 iemNativeEflagsToLivenessMask<a_fBit>()); \
3749 do {
3750
3751/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3752 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3753DECL_INLINE_THROW(uint32_t)
3754iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3755 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3756{
3757 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3758 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3759 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3760
3761 /* We have to load both RCX and EFLAGS before we can start branching,
3762 otherwise we'll end up in the else-block with an inconsistent
3763 register allocator state.
3764 Doing EFLAGS first as it's more likely to be loaded, right? */
3765 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3766 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3767 kIemNativeGstRegUse_ReadOnly);
3768
3769 /** @todo we could reduce this to a single branch instruction by spending a
3770 * temporary register and some setnz stuff. Not sure if loops are
3771 * worth it. */
3772 /* Check CX. */
3773#ifdef RT_ARCH_AMD64
3774 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3775#else
3776 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3777 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3778 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3779#endif
3780
3781 /* Check the EFlags bit. */
3782 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3783 !fCheckIfSet /*fJmpIfSet*/);
3784
3785 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3786 iemNativeRegFreeTmp(pReNative, idxEflReg);
3787
3788 iemNativeCondStartIfBlock(pReNative, off);
3789 return off;
3790}
3791
3792
3793#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3794 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3795 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3796 iemNativeEflagsToLivenessMask<a_fBit>()); \
3797 do {
3798
3799#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3800 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3801 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3802 iemNativeEflagsToLivenessMask<a_fBit>()); \
3803 do {
3804
3805#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3806 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3807 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3808 iemNativeEflagsToLivenessMask<a_fBit>()); \
3809 do {
3810
3811#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3812 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3813 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3814 iemNativeEflagsToLivenessMask<a_fBit>()); \
3815 do {
3816
3817/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3818 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3819 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3820 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3821DECL_INLINE_THROW(uint32_t)
3822iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3823 unsigned iBitNo, uint64_t fLivenessEFlBit)
3824
3825{
3826 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3827 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3828 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3829
3830 /* We have to load both RCX and EFLAGS before we can start branching,
3831 otherwise we'll end up in the else-block with an inconsistent
3832 register allocator state.
3833 Doing EFLAGS first as it's more likely to be loaded, right? */
3834 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3835 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3836 kIemNativeGstRegUse_ReadOnly);
3837
3838 /** @todo we could reduce this to a single branch instruction by spending a
3839 * temporary register and some setnz stuff. Not sure if loops are
3840 * worth it. */
3841 /* Check RCX/ECX. */
3842 if (f64Bit)
3843 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3844 else
3845 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3846
3847 /* Check the EFlags bit. */
3848 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3849 !fCheckIfSet /*fJmpIfSet*/);
3850
3851 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3852 iemNativeRegFreeTmp(pReNative, idxEflReg);
3853
3854 iemNativeCondStartIfBlock(pReNative, off);
3855 return off;
3856}
3857
3858
3859#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3860 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3861 do {
3862
3863/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3864DECL_INLINE_THROW(uint32_t)
3865iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3866{
3867 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3868
3869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3870 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3871 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3872 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3873
3874 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3875
3876 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3877
3878 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3879
3880 iemNativeCondStartIfBlock(pReNative, off);
3881 return off;
3882}
3883
3884
3885#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3886 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3887 do {
3888
3889/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3890DECL_INLINE_THROW(uint32_t)
3891iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3892{
3893 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3894 Assert(iGReg < 16);
3895
3896 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3897 kIemNativeGstRegUse_ReadOnly);
3898
3899 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3900
3901 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3902
3903 iemNativeCondStartIfBlock(pReNative, off);
3904 return off;
3905}
3906
3907
3908
3909/*********************************************************************************************************************************
3910* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3911*********************************************************************************************************************************/
3912
3913#define IEM_MC_NOREF(a_Name) \
3914 RT_NOREF_PV(a_Name)
3915
3916#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3917 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3918
3919#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3920 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3921
3922#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3923 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3924
3925#define IEM_MC_LOCAL(a_Type, a_Name) \
3926 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3927
3928#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3929 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3930
3931#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3932 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3933
3934
3935/**
3936 * Sets the host register for @a idxVarRc to @a idxReg.
3937 *
3938 * Any guest register shadowing will be implictly dropped by this call.
3939 *
3940 * The variable must not have any register associated with it (causes
3941 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3942 * implied.
3943 *
3944 * @returns idxReg
3945 * @param pReNative The recompiler state.
3946 * @param idxVar The variable.
3947 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3948 * @param off For recording in debug info.
3949 * @param fAllocated Set if the register is already allocated, false if not.
3950 *
3951 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3952 */
3953DECL_INLINE_THROW(uint8_t)
3954iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3955{
3956 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3957 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3958 Assert(!pVar->fRegAcquired);
3959 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3960 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3961 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3962 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3963
3964 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3965 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3966
3967 iemNativeVarSetKindToStack(pReNative, idxVar);
3968 pVar->idxReg = idxReg;
3969
3970 return idxReg;
3971}
3972
3973
3974/**
3975 * A convenient helper function.
3976 */
3977DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3978 uint8_t idxReg, uint32_t *poff)
3979{
3980 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3981 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3982 return idxReg;
3983}
3984
3985
3986/**
3987 * This is called by IEM_MC_END() to clean up all variables.
3988 */
3989DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3990{
3991 uint32_t const bmVars = pReNative->Core.bmVars;
3992 if (bmVars != 0)
3993 iemNativeVarFreeAllSlow(pReNative, bmVars);
3994 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3995 Assert(pReNative->Core.bmStack == 0);
3996}
3997
3998
3999#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4000
4001/**
4002 * This is called by IEM_MC_FREE_LOCAL.
4003 */
4004DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4005{
4006 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4007 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4008 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4009}
4010
4011
4012#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4013
4014/**
4015 * This is called by IEM_MC_FREE_ARG.
4016 */
4017DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4018{
4019 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4020 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4021 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4022}
4023
4024
4025#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4026
4027/**
4028 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4029 */
4030DECL_INLINE_THROW(uint32_t)
4031iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4032{
4033 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4034 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4035 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4036 Assert( pVarDst->cbVar == sizeof(uint16_t)
4037 || pVarDst->cbVar == sizeof(uint32_t));
4038
4039 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4040 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4041 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4042 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4043 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4044
4045 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4046
4047 /*
4048 * Special case for immediates.
4049 */
4050 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4051 {
4052 switch (pVarDst->cbVar)
4053 {
4054 case sizeof(uint16_t):
4055 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4056 break;
4057 case sizeof(uint32_t):
4058 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4059 break;
4060 default: AssertFailed(); break;
4061 }
4062 }
4063 else
4064 {
4065 /*
4066 * The generic solution for now.
4067 */
4068 /** @todo optimize this by having the python script make sure the source
4069 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4070 * statement. Then we could just transfer the register assignments. */
4071 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4072 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4073 switch (pVarDst->cbVar)
4074 {
4075 case sizeof(uint16_t):
4076 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4077 break;
4078 case sizeof(uint32_t):
4079 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4080 break;
4081 default: AssertFailed(); break;
4082 }
4083 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4084 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4085 }
4086 return off;
4087}
4088
4089
4090
4091/*********************************************************************************************************************************
4092* Emitters for IEM_MC_CALL_CIMPL_XXX *
4093*********************************************************************************************************************************/
4094
4095/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4096DECL_INLINE_THROW(uint32_t)
4097iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4098 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4099
4100{
4101 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4102 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4103
4104 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4105 when a calls clobber any of the relevant control registers. */
4106#if 1
4107 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4108 {
4109 /* Likely as long as call+ret are done via cimpl. */
4110 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4111 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4112 }
4113 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4114 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4115 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4116 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4117 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4118 else
4119 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4120 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4121 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4122
4123#else
4124 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4125 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4126 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4127 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4128 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4129 || pfnCImpl == (uintptr_t)iemCImpl_callf
4130 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4131 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4132 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4133 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4134 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4135#endif
4136
4137#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4138 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4139 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4140 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4141#endif
4142
4143 /*
4144 * Do all the call setup and cleanup.
4145 */
4146 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4147
4148 /*
4149 * Load the two or three hidden arguments.
4150 */
4151#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4152 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
4153 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4154 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4155#else
4156 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4157 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4158#endif
4159
4160 /*
4161 * Make the call and check the return code.
4162 *
4163 * Shadow PC copies are always flushed here, other stuff depends on flags.
4164 * Segment and general purpose registers are explictily flushed via the
4165 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4166 * macros.
4167 */
4168 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4169#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4170 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
4171#endif
4172 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4173 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4174 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4175 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4176
4177#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4178 pReNative->Core.fDebugPcInitialized = false;
4179 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4180#endif
4181
4182 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4183}
4184
4185
4186#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4187 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4188
4189/** Emits code for IEM_MC_CALL_CIMPL_1. */
4190DECL_INLINE_THROW(uint32_t)
4191iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4192 uintptr_t pfnCImpl, uint8_t idxArg0)
4193{
4194 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4195 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4196}
4197
4198
4199#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4200 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4201
4202/** Emits code for IEM_MC_CALL_CIMPL_2. */
4203DECL_INLINE_THROW(uint32_t)
4204iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4205 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4206{
4207 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4208 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4209 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4210}
4211
4212
4213#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4214 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4215 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4216
4217/** Emits code for IEM_MC_CALL_CIMPL_3. */
4218DECL_INLINE_THROW(uint32_t)
4219iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4220 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4221{
4222 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4223 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4224 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4225 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4226}
4227
4228
4229#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4230 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4231 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4232
4233/** Emits code for IEM_MC_CALL_CIMPL_4. */
4234DECL_INLINE_THROW(uint32_t)
4235iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4236 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4237{
4238 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4239 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4240 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4241 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4242 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4243}
4244
4245
4246#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4247 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4248 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4249
4250/** Emits code for IEM_MC_CALL_CIMPL_4. */
4251DECL_INLINE_THROW(uint32_t)
4252iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4253 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4254{
4255 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4256 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4257 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4260 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4261}
4262
4263
4264/** Recompiler debugging: Flush guest register shadow copies. */
4265#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4266
4267
4268
4269/*********************************************************************************************************************************
4270* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4271*********************************************************************************************************************************/
4272
4273/**
4274 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4275 */
4276DECL_INLINE_THROW(uint32_t)
4277iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4278 uintptr_t pfnAImpl, uint8_t cArgs)
4279{
4280 if (idxVarRc != UINT8_MAX)
4281 {
4282 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4283 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4284 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4285 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4286 }
4287
4288 /*
4289 * Do all the call setup and cleanup.
4290 *
4291 * It is only required to flush pending guest register writes in call volatile registers as
4292 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4293 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4294 * no matter the fFlushPendingWrites parameter.
4295 */
4296 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4297
4298 /*
4299 * Make the call and update the return code variable if we've got one.
4300 */
4301 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4302 if (idxVarRc != UINT8_MAX)
4303 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4304
4305 return off;
4306}
4307
4308
4309
4310#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4311 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4312
4313#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4314 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4315
4316/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4317DECL_INLINE_THROW(uint32_t)
4318iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4319{
4320 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4321}
4322
4323
4324#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4325 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4326
4327#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4328 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4329
4330/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4331DECL_INLINE_THROW(uint32_t)
4332iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4333{
4334 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4335 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4336}
4337
4338
4339#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4340 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4341
4342#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4343 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4344
4345/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4346DECL_INLINE_THROW(uint32_t)
4347iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4348 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4349{
4350 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4352 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4353}
4354
4355
4356#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4357 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4358
4359#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4360 IEM_MC_LOCAL(a_rcType, a_rc); \
4361 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4362
4363/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4364DECL_INLINE_THROW(uint32_t)
4365iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4366 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4367{
4368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4369 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4370 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4371 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4372}
4373
4374
4375#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4376 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4377
4378#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4379 IEM_MC_LOCAL(a_rcType, a_rc); \
4380 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4381
4382/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4383DECL_INLINE_THROW(uint32_t)
4384iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4385 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4386{
4387 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4388 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4389 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4391 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4392}
4393
4394
4395
4396/*********************************************************************************************************************************
4397* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4398*********************************************************************************************************************************/
4399
4400#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4401 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4402
4403#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4404 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4405
4406#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4407 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4408
4409#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4410 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4411
4412
4413/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4414 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4415DECL_INLINE_THROW(uint32_t)
4416iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4417{
4418 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4419 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4420 Assert(iGRegEx < 20);
4421
4422 /* Same discussion as in iemNativeEmitFetchGregU16 */
4423 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4424 kIemNativeGstRegUse_ReadOnly);
4425
4426 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4427 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4428
4429 /* The value is zero-extended to the full 64-bit host register width. */
4430 if (iGRegEx < 16)
4431 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4432 else
4433 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4434
4435 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4436 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4437 return off;
4438}
4439
4440
4441#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4442 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4443
4444#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4445 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4446
4447#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4448 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4449
4450/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4451DECL_INLINE_THROW(uint32_t)
4452iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4453{
4454 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4455 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4456 Assert(iGRegEx < 20);
4457
4458 /* Same discussion as in iemNativeEmitFetchGregU16 */
4459 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4460 kIemNativeGstRegUse_ReadOnly);
4461
4462 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4463 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4464
4465 if (iGRegEx < 16)
4466 {
4467 switch (cbSignExtended)
4468 {
4469 case sizeof(uint16_t):
4470 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4471 break;
4472 case sizeof(uint32_t):
4473 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4474 break;
4475 case sizeof(uint64_t):
4476 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4477 break;
4478 default: AssertFailed(); break;
4479 }
4480 }
4481 else
4482 {
4483 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4484 switch (cbSignExtended)
4485 {
4486 case sizeof(uint16_t):
4487 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4488 break;
4489 case sizeof(uint32_t):
4490 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4491 break;
4492 case sizeof(uint64_t):
4493 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4494 break;
4495 default: AssertFailed(); break;
4496 }
4497 }
4498
4499 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4500 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4501 return off;
4502}
4503
4504
4505
4506#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4507 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4508
4509#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4510 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4511
4512#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4513 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4514
4515/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4516DECL_INLINE_THROW(uint32_t)
4517iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4518{
4519 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4520 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4521 Assert(iGReg < 16);
4522
4523 /*
4524 * We can either just load the low 16-bit of the GPR into a host register
4525 * for the variable, or we can do so via a shadow copy host register. The
4526 * latter will avoid having to reload it if it's being stored later, but
4527 * will waste a host register if it isn't touched again. Since we don't
4528 * know what going to happen, we choose the latter for now.
4529 */
4530 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4531 kIemNativeGstRegUse_ReadOnly);
4532
4533 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4534 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4535 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4536 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4537
4538 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4539 return off;
4540}
4541
4542#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4543 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4544
4545/** Emits code for IEM_MC_FETCH_GREG_I16. */
4546DECL_INLINE_THROW(uint32_t)
4547iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4548{
4549 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4550 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4551 Assert(iGReg < 16);
4552
4553 /*
4554 * We can either just load the low 16-bit of the GPR into a host register
4555 * for the variable, or we can do so via a shadow copy host register. The
4556 * latter will avoid having to reload it if it's being stored later, but
4557 * will waste a host register if it isn't touched again. Since we don't
4558 * know what going to happen, we choose the latter for now.
4559 */
4560 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4561 kIemNativeGstRegUse_ReadOnly);
4562
4563 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4564 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4565#ifdef RT_ARCH_AMD64
4566 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4567#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4568 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4569#endif
4570 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4571
4572 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4573 return off;
4574}
4575
4576
4577#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4578 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4579
4580#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4581 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4582
4583/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4584DECL_INLINE_THROW(uint32_t)
4585iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4586{
4587 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4588 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4589 Assert(iGReg < 16);
4590
4591 /*
4592 * We can either just load the low 16-bit of the GPR into a host register
4593 * for the variable, or we can do so via a shadow copy host register. The
4594 * latter will avoid having to reload it if it's being stored later, but
4595 * will waste a host register if it isn't touched again. Since we don't
4596 * know what going to happen, we choose the latter for now.
4597 */
4598 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4599 kIemNativeGstRegUse_ReadOnly);
4600
4601 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4602 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4603 if (cbSignExtended == sizeof(uint32_t))
4604 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4605 else
4606 {
4607 Assert(cbSignExtended == sizeof(uint64_t));
4608 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4609 }
4610 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4611
4612 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4613 return off;
4614}
4615
4616
4617#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4618 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4619
4620#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4621 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4622
4623#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4624 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4625
4626/** Emits code for IEM_MC_FETCH_GREG_U32. */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4629{
4630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4631 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4632 Assert(iGReg < 16);
4633
4634 /*
4635 * We can either just load the low 16-bit of the GPR into a host register
4636 * for the variable, or we can do so via a shadow copy host register. The
4637 * latter will avoid having to reload it if it's being stored later, but
4638 * will waste a host register if it isn't touched again. Since we don't
4639 * know what going to happen, we choose the latter for now.
4640 */
4641 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4642 kIemNativeGstRegUse_ReadOnly);
4643
4644 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4646 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4647 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4648
4649 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4650 return off;
4651}
4652
4653
4654#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4655 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4656
4657/** Emits code for IEM_MC_FETCH_GREG_U32. */
4658DECL_INLINE_THROW(uint32_t)
4659iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4660{
4661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4663 Assert(iGReg < 16);
4664
4665 /*
4666 * We can either just load the low 32-bit of the GPR into a host register
4667 * for the variable, or we can do so via a shadow copy host register. The
4668 * latter will avoid having to reload it if it's being stored later, but
4669 * will waste a host register if it isn't touched again. Since we don't
4670 * know what going to happen, we choose the latter for now.
4671 */
4672 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4673 kIemNativeGstRegUse_ReadOnly);
4674
4675 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4676 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4677 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4678 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4679
4680 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4681 return off;
4682}
4683
4684
4685#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4686 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4687
4688#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4689 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4690
4691/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4692 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4693DECL_INLINE_THROW(uint32_t)
4694iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4695{
4696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4697 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4698 Assert(iGReg < 16);
4699
4700 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4701 kIemNativeGstRegUse_ReadOnly);
4702
4703 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4704 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4705 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4706 /** @todo name the register a shadow one already? */
4707 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4708
4709 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4710 return off;
4711}
4712
4713
4714#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4715 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4716
4717/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4718DECL_INLINE_THROW(uint32_t)
4719iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4720{
4721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4723 Assert(iGRegLo < 16 && iGRegHi < 16);
4724
4725 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4726 kIemNativeGstRegUse_ReadOnly);
4727 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4728 kIemNativeGstRegUse_ReadOnly);
4729
4730 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4731 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4732 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4733 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4734
4735 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4736 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4737 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4738 return off;
4739}
4740
4741
4742/*********************************************************************************************************************************
4743* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4744*********************************************************************************************************************************/
4745
4746#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4747 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4748
4749/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4750DECL_INLINE_THROW(uint32_t)
4751iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4752{
4753 Assert(iGRegEx < 20);
4754 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4755 kIemNativeGstRegUse_ForUpdate);
4756#ifdef RT_ARCH_AMD64
4757 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4758
4759 /* To the lowest byte of the register: mov r8, imm8 */
4760 if (iGRegEx < 16)
4761 {
4762 if (idxGstTmpReg >= 8)
4763 pbCodeBuf[off++] = X86_OP_REX_B;
4764 else if (idxGstTmpReg >= 4)
4765 pbCodeBuf[off++] = X86_OP_REX;
4766 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4767 pbCodeBuf[off++] = u8Value;
4768 }
4769 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4770 else if (idxGstTmpReg < 4)
4771 {
4772 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4773 pbCodeBuf[off++] = u8Value;
4774 }
4775 else
4776 {
4777 /* ror reg64, 8 */
4778 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4779 pbCodeBuf[off++] = 0xc1;
4780 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4781 pbCodeBuf[off++] = 8;
4782
4783 /* mov reg8, imm8 */
4784 if (idxGstTmpReg >= 8)
4785 pbCodeBuf[off++] = X86_OP_REX_B;
4786 else if (idxGstTmpReg >= 4)
4787 pbCodeBuf[off++] = X86_OP_REX;
4788 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4789 pbCodeBuf[off++] = u8Value;
4790
4791 /* rol reg64, 8 */
4792 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4793 pbCodeBuf[off++] = 0xc1;
4794 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4795 pbCodeBuf[off++] = 8;
4796 }
4797
4798#elif defined(RT_ARCH_ARM64)
4799 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4800 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4801 if (iGRegEx < 16)
4802 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4803 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4804 else
4805 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4806 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4807 iemNativeRegFreeTmp(pReNative, idxImmReg);
4808
4809#else
4810# error "Port me!"
4811#endif
4812
4813 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4814
4815#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4816 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4817#endif
4818
4819 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4820 return off;
4821}
4822
4823
4824#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4825 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4826
4827/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4828DECL_INLINE_THROW(uint32_t)
4829iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4830{
4831 Assert(iGRegEx < 20);
4832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4833
4834 /*
4835 * If it's a constant value (unlikely) we treat this as a
4836 * IEM_MC_STORE_GREG_U8_CONST statement.
4837 */
4838 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4839 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4840 { /* likely */ }
4841 else
4842 {
4843 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4844 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4845 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4846 }
4847
4848 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4849 kIemNativeGstRegUse_ForUpdate);
4850 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4851
4852#ifdef RT_ARCH_AMD64
4853 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4854 if (iGRegEx < 16)
4855 {
4856 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4857 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4858 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4859 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4860 pbCodeBuf[off++] = X86_OP_REX;
4861 pbCodeBuf[off++] = 0x8a;
4862 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4863 }
4864 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4865 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4866 {
4867 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4868 pbCodeBuf[off++] = 0x8a;
4869 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4870 }
4871 else
4872 {
4873 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4874
4875 /* ror reg64, 8 */
4876 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4877 pbCodeBuf[off++] = 0xc1;
4878 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4879 pbCodeBuf[off++] = 8;
4880
4881 /* mov reg8, reg8(r/m) */
4882 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4883 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4884 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4885 pbCodeBuf[off++] = X86_OP_REX;
4886 pbCodeBuf[off++] = 0x8a;
4887 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4888
4889 /* rol reg64, 8 */
4890 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4891 pbCodeBuf[off++] = 0xc1;
4892 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4893 pbCodeBuf[off++] = 8;
4894 }
4895
4896#elif defined(RT_ARCH_ARM64)
4897 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4898 or
4899 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4900 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4901 if (iGRegEx < 16)
4902 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4903 else
4904 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4905
4906#else
4907# error "Port me!"
4908#endif
4909 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4910
4911 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4912
4913#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4914 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4915#endif
4916 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4917 return off;
4918}
4919
4920
4921
4922#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4923 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4924
4925/** Emits code for IEM_MC_STORE_GREG_U16. */
4926DECL_INLINE_THROW(uint32_t)
4927iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4928{
4929 Assert(iGReg < 16);
4930 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4931 kIemNativeGstRegUse_ForUpdate);
4932#ifdef RT_ARCH_AMD64
4933 /* mov reg16, imm16 */
4934 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4935 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4936 if (idxGstTmpReg >= 8)
4937 pbCodeBuf[off++] = X86_OP_REX_B;
4938 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4939 pbCodeBuf[off++] = RT_BYTE1(uValue);
4940 pbCodeBuf[off++] = RT_BYTE2(uValue);
4941
4942#elif defined(RT_ARCH_ARM64)
4943 /* movk xdst, #uValue, lsl #0 */
4944 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4945 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4946
4947#else
4948# error "Port me!"
4949#endif
4950
4951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4952
4953#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4954 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4955#endif
4956 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4957 return off;
4958}
4959
4960
4961#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4962 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4963
4964/** Emits code for IEM_MC_STORE_GREG_U16. */
4965DECL_INLINE_THROW(uint32_t)
4966iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4967{
4968 Assert(iGReg < 16);
4969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4970
4971 /*
4972 * If it's a constant value (unlikely) we treat this as a
4973 * IEM_MC_STORE_GREG_U16_CONST statement.
4974 */
4975 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4976 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4977 { /* likely */ }
4978 else
4979 {
4980 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4981 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4982 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4983 }
4984
4985 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4986 kIemNativeGstRegUse_ForUpdate);
4987
4988#ifdef RT_ARCH_AMD64
4989 /* mov reg16, reg16 or [mem16] */
4990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4991 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4992 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4993 {
4994 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4995 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4996 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4997 pbCodeBuf[off++] = 0x8b;
4998 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4999 }
5000 else
5001 {
5002 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5003 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5004 if (idxGstTmpReg >= 8)
5005 pbCodeBuf[off++] = X86_OP_REX_R;
5006 pbCodeBuf[off++] = 0x8b;
5007 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5008 }
5009
5010#elif defined(RT_ARCH_ARM64)
5011 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5012 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5013 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5014 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5015 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5016
5017#else
5018# error "Port me!"
5019#endif
5020
5021 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5022
5023#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5024 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5025#endif
5026 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5027 return off;
5028}
5029
5030
5031#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5032 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5033
5034/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5035DECL_INLINE_THROW(uint32_t)
5036iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5037{
5038 Assert(iGReg < 16);
5039 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5040 kIemNativeGstRegUse_ForFullWrite);
5041 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5042#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5043 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5044#endif
5045 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5046 return off;
5047}
5048
5049
5050#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5051 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5052
5053#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5054 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5055
5056/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5057DECL_INLINE_THROW(uint32_t)
5058iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5059{
5060 Assert(iGReg < 16);
5061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5062
5063 /*
5064 * If it's a constant value (unlikely) we treat this as a
5065 * IEM_MC_STORE_GREG_U32_CONST statement.
5066 */
5067 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5068 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5069 { /* likely */ }
5070 else
5071 {
5072 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5073 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5074 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5075 }
5076
5077 /*
5078 * For the rest we allocate a guest register for the variable and writes
5079 * it to the CPUMCTX structure.
5080 */
5081 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5082#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5083 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5084#else
5085 RT_NOREF(idxVarReg);
5086#endif
5087#ifdef VBOX_STRICT
5088 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5089#endif
5090 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5091 return off;
5092}
5093
5094
5095#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5096 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5097
5098/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5099DECL_INLINE_THROW(uint32_t)
5100iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5101{
5102 Assert(iGReg < 16);
5103 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5104 kIemNativeGstRegUse_ForFullWrite);
5105 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5106#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5107 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5108#endif
5109 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5110 return off;
5111}
5112
5113
5114#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5115 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5116
5117#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5118 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5119
5120/** Emits code for IEM_MC_STORE_GREG_U64. */
5121DECL_INLINE_THROW(uint32_t)
5122iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5123{
5124 Assert(iGReg < 16);
5125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5126
5127 /*
5128 * If it's a constant value (unlikely) we treat this as a
5129 * IEM_MC_STORE_GREG_U64_CONST statement.
5130 */
5131 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5132 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5133 { /* likely */ }
5134 else
5135 {
5136 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5137 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5138 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5139 }
5140
5141 /*
5142 * For the rest we allocate a guest register for the variable and writes
5143 * it to the CPUMCTX structure.
5144 */
5145 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5146#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5147 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5148#else
5149 RT_NOREF(idxVarReg);
5150#endif
5151 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5152 return off;
5153}
5154
5155
5156#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5157 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5158
5159/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5160DECL_INLINE_THROW(uint32_t)
5161iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5162{
5163 Assert(iGReg < 16);
5164 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5165 kIemNativeGstRegUse_ForUpdate);
5166 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5167#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5168 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5169#endif
5170 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5171 return off;
5172}
5173
5174
5175#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5176 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5177
5178/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5179DECL_INLINE_THROW(uint32_t)
5180iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5181{
5182 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5183 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5184 Assert(iGRegLo < 16 && iGRegHi < 16);
5185
5186 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5187 kIemNativeGstRegUse_ForFullWrite);
5188 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5189 kIemNativeGstRegUse_ForFullWrite);
5190
5191 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5192 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5193 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5194 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5195
5196 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5197 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5198 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5199 return off;
5200}
5201
5202
5203/*********************************************************************************************************************************
5204* General purpose register manipulation (add, sub). *
5205*********************************************************************************************************************************/
5206
5207#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5208 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5209
5210/** Emits code for IEM_MC_ADD_GREG_U16. */
5211DECL_INLINE_THROW(uint32_t)
5212iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5213{
5214 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5215 kIemNativeGstRegUse_ForUpdate);
5216
5217#ifdef RT_ARCH_AMD64
5218 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5219 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5220 if (idxGstTmpReg >= 8)
5221 pbCodeBuf[off++] = X86_OP_REX_B;
5222 if (uAddend == 1)
5223 {
5224 pbCodeBuf[off++] = 0xff; /* inc */
5225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5226 }
5227 else
5228 {
5229 pbCodeBuf[off++] = 0x81;
5230 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5231 pbCodeBuf[off++] = uAddend;
5232 pbCodeBuf[off++] = 0;
5233 }
5234
5235#else
5236 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5237 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5238
5239 /* sub tmp, gstgrp, uAddend */
5240 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5241
5242 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5243 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5244
5245 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5246#endif
5247
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249
5250#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5251 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5252#endif
5253
5254 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5255 return off;
5256}
5257
5258
5259#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5260 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5261
5262#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5263 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5264
5265/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5266DECL_INLINE_THROW(uint32_t)
5267iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5268{
5269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5270 kIemNativeGstRegUse_ForUpdate);
5271
5272#ifdef RT_ARCH_AMD64
5273 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5274 if (f64Bit)
5275 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5276 else if (idxGstTmpReg >= 8)
5277 pbCodeBuf[off++] = X86_OP_REX_B;
5278 if (uAddend == 1)
5279 {
5280 pbCodeBuf[off++] = 0xff; /* inc */
5281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5282 }
5283 else if (uAddend < 128)
5284 {
5285 pbCodeBuf[off++] = 0x83; /* add */
5286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5287 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5288 }
5289 else
5290 {
5291 pbCodeBuf[off++] = 0x81; /* add */
5292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5293 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5294 pbCodeBuf[off++] = 0;
5295 pbCodeBuf[off++] = 0;
5296 pbCodeBuf[off++] = 0;
5297 }
5298
5299#else
5300 /* sub tmp, gstgrp, uAddend */
5301 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5302 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5303
5304#endif
5305
5306 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5307
5308#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5309 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5310#endif
5311
5312 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5313 return off;
5314}
5315
5316
5317
5318#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5319 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5320
5321/** Emits code for IEM_MC_SUB_GREG_U16. */
5322DECL_INLINE_THROW(uint32_t)
5323iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5324{
5325 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5326 kIemNativeGstRegUse_ForUpdate);
5327
5328#ifdef RT_ARCH_AMD64
5329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5330 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5331 if (idxGstTmpReg >= 8)
5332 pbCodeBuf[off++] = X86_OP_REX_B;
5333 if (uSubtrahend == 1)
5334 {
5335 pbCodeBuf[off++] = 0xff; /* dec */
5336 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5337 }
5338 else
5339 {
5340 pbCodeBuf[off++] = 0x81;
5341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5342 pbCodeBuf[off++] = uSubtrahend;
5343 pbCodeBuf[off++] = 0;
5344 }
5345
5346#else
5347 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5348 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5349
5350 /* sub tmp, gstgrp, uSubtrahend */
5351 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5352
5353 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5354 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5355
5356 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5357#endif
5358
5359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5360
5361#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5362 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5363#endif
5364
5365 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5366 return off;
5367}
5368
5369
5370#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5371 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5372
5373#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5374 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5375
5376/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5377DECL_INLINE_THROW(uint32_t)
5378iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5379{
5380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5381 kIemNativeGstRegUse_ForUpdate);
5382
5383#ifdef RT_ARCH_AMD64
5384 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5385 if (f64Bit)
5386 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5387 else if (idxGstTmpReg >= 8)
5388 pbCodeBuf[off++] = X86_OP_REX_B;
5389 if (uSubtrahend == 1)
5390 {
5391 pbCodeBuf[off++] = 0xff; /* dec */
5392 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5393 }
5394 else if (uSubtrahend < 128)
5395 {
5396 pbCodeBuf[off++] = 0x83; /* sub */
5397 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5398 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5399 }
5400 else
5401 {
5402 pbCodeBuf[off++] = 0x81; /* sub */
5403 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5404 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5405 pbCodeBuf[off++] = 0;
5406 pbCodeBuf[off++] = 0;
5407 pbCodeBuf[off++] = 0;
5408 }
5409
5410#else
5411 /* sub tmp, gstgrp, uSubtrahend */
5412 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5414
5415#endif
5416
5417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5418
5419#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5420 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5421#endif
5422
5423 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5424 return off;
5425}
5426
5427
5428#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5429 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5430
5431#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5432 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5433
5434#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5435 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5436
5437#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5438 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5439
5440/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5441DECL_INLINE_THROW(uint32_t)
5442iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5443{
5444#ifdef VBOX_STRICT
5445 switch (cbMask)
5446 {
5447 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5448 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5449 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5450 case sizeof(uint64_t): break;
5451 default: AssertFailedBreak();
5452 }
5453#endif
5454
5455 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5456 kIemNativeGstRegUse_ForUpdate);
5457
5458 switch (cbMask)
5459 {
5460 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5461 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5462 break;
5463 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5464 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5465 break;
5466 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5467 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5468 break;
5469 case sizeof(uint64_t):
5470 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5471 break;
5472 default: AssertFailedBreak();
5473 }
5474
5475 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5476
5477#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5478 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5479#endif
5480
5481 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5482 return off;
5483}
5484
5485
5486#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5487 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5488
5489#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5490 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5491
5492#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5493 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5494
5495#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5496 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5497
5498/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5499DECL_INLINE_THROW(uint32_t)
5500iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5501{
5502#ifdef VBOX_STRICT
5503 switch (cbMask)
5504 {
5505 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5506 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5507 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5508 case sizeof(uint64_t): break;
5509 default: AssertFailedBreak();
5510 }
5511#endif
5512
5513 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5514 kIemNativeGstRegUse_ForUpdate);
5515
5516 switch (cbMask)
5517 {
5518 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5519 case sizeof(uint16_t):
5520 case sizeof(uint64_t):
5521 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5522 break;
5523 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5524 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5525 break;
5526 default: AssertFailedBreak();
5527 }
5528
5529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5530
5531#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5533#endif
5534
5535 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5536 return off;
5537}
5538
5539
5540/*********************************************************************************************************************************
5541* Local/Argument variable manipulation (add, sub, and, or). *
5542*********************************************************************************************************************************/
5543
5544#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5545 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5546
5547#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5548 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5549
5550#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5551 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5552
5553#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5554 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5555
5556
5557#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5558 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5559
5560#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5561 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5562
5563#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5564 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5565
5566/** Emits code for AND'ing a local and a constant value. */
5567DECL_INLINE_THROW(uint32_t)
5568iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5569{
5570#ifdef VBOX_STRICT
5571 switch (cbMask)
5572 {
5573 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5574 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5575 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5576 case sizeof(uint64_t): break;
5577 default: AssertFailedBreak();
5578 }
5579#endif
5580
5581 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5582 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5583
5584 if (cbMask <= sizeof(uint32_t))
5585 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5586 else
5587 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5588
5589 iemNativeVarRegisterRelease(pReNative, idxVar);
5590 return off;
5591}
5592
5593
5594#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5595 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5596
5597#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5598 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5599
5600#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5601 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5602
5603#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5604 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5605
5606/** Emits code for OR'ing a local and a constant value. */
5607DECL_INLINE_THROW(uint32_t)
5608iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5609{
5610#ifdef VBOX_STRICT
5611 switch (cbMask)
5612 {
5613 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5614 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5615 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5616 case sizeof(uint64_t): break;
5617 default: AssertFailedBreak();
5618 }
5619#endif
5620
5621 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5622 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5623
5624 if (cbMask <= sizeof(uint32_t))
5625 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5626 else
5627 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5628
5629 iemNativeVarRegisterRelease(pReNative, idxVar);
5630 return off;
5631}
5632
5633
5634#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5635 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5636
5637#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5638 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5639
5640#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5641 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5642
5643/** Emits code for reversing the byte order in a local value. */
5644DECL_INLINE_THROW(uint32_t)
5645iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5646{
5647 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5648 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5649
5650 switch (cbLocal)
5651 {
5652 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5653 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5654 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5655 default: AssertFailedBreak();
5656 }
5657
5658 iemNativeVarRegisterRelease(pReNative, idxVar);
5659 return off;
5660}
5661
5662
5663#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5664 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5665
5666#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5667 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5668
5669#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5670 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5671
5672/** Emits code for shifting left a local value. */
5673DECL_INLINE_THROW(uint32_t)
5674iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5675{
5676#ifdef VBOX_STRICT
5677 switch (cbLocal)
5678 {
5679 case sizeof(uint8_t): Assert(cShift < 8); break;
5680 case sizeof(uint16_t): Assert(cShift < 16); break;
5681 case sizeof(uint32_t): Assert(cShift < 32); break;
5682 case sizeof(uint64_t): Assert(cShift < 64); break;
5683 default: AssertFailedBreak();
5684 }
5685#endif
5686
5687 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5689
5690 if (cbLocal <= sizeof(uint32_t))
5691 {
5692 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5693 if (cbLocal < sizeof(uint32_t))
5694 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5695 cbLocal == sizeof(uint16_t)
5696 ? UINT32_C(0xffff)
5697 : UINT32_C(0xff));
5698 }
5699 else
5700 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5701
5702 iemNativeVarRegisterRelease(pReNative, idxVar);
5703 return off;
5704}
5705
5706
5707#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5708 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5709
5710#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5711 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5712
5713#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5714 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5715
5716/** Emits code for shifting left a local value. */
5717DECL_INLINE_THROW(uint32_t)
5718iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5719{
5720#ifdef VBOX_STRICT
5721 switch (cbLocal)
5722 {
5723 case sizeof(int8_t): Assert(cShift < 8); break;
5724 case sizeof(int16_t): Assert(cShift < 16); break;
5725 case sizeof(int32_t): Assert(cShift < 32); break;
5726 case sizeof(int64_t): Assert(cShift < 64); break;
5727 default: AssertFailedBreak();
5728 }
5729#endif
5730
5731 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5732 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5733
5734 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5735 if (cbLocal == sizeof(uint8_t))
5736 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5737 else if (cbLocal == sizeof(uint16_t))
5738 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5739
5740 if (cbLocal <= sizeof(uint32_t))
5741 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5742 else
5743 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5744
5745 iemNativeVarRegisterRelease(pReNative, idxVar);
5746 return off;
5747}
5748
5749
5750#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5751 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5752
5753#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5754 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5755
5756#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5757 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5758
5759/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5760DECL_INLINE_THROW(uint32_t)
5761iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5762{
5763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5764 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5765 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5766 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5767
5768 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5769 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5770
5771 /* Need to sign extend the value. */
5772 if (cbLocal <= sizeof(uint32_t))
5773 {
5774/** @todo ARM64: In case of boredone, the extended add instruction can do the
5775 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5776 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5777
5778 switch (cbLocal)
5779 {
5780 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5781 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5782 default: AssertFailed();
5783 }
5784
5785 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5786 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5787 }
5788 else
5789 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5790
5791 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5792 iemNativeVarRegisterRelease(pReNative, idxVar);
5793 return off;
5794}
5795
5796
5797
5798/*********************************************************************************************************************************
5799* EFLAGS *
5800*********************************************************************************************************************************/
5801
5802#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5803# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5804#else
5805# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5806 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5807
5808DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5809{
5810 if (fEflOutput)
5811 {
5812 PVMCPUCC const pVCpu = pReNative->pVCpu;
5813# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5814 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5815 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5816 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5817# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5818 if (fEflOutput & (a_fEfl)) \
5819 { \
5820 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5821 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5822 else \
5823 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5824 } else do { } while (0)
5825# else
5826 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5827 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5828 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5829# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5830 if (fEflOutput & (a_fEfl)) \
5831 { \
5832 if (LivenessClobbered.a_fLivenessMember) \
5833 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5834 else if (LivenessDelayable.a_fLivenessMember) \
5835 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5836 else \
5837 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5838 } else do { } while (0)
5839# endif
5840 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5841 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5842 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5843 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5844 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5845 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5846 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5847# undef CHECK_FLAG_AND_UPDATE_STATS
5848 }
5849 RT_NOREF(fEflInput);
5850}
5851#endif /* VBOX_WITH_STATISTICS */
5852
5853#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5854#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5855 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5856 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5857
5858/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5859template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5860 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5861DECL_INLINE_THROW(uint32_t)
5862iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5863{
5864 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5865 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5866 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5867
5868#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5869# ifdef VBOX_STRICT
5870 if ( pReNative->idxCurCall != 0
5871 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5872 {
5873 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5874 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5875# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5876 AssertMsg( !(fBoth & (a_fElfConst)) \
5877 || (!(a_fEflInput & (a_fElfConst)) \
5878 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5879 : !(a_fEflOutput & (a_fElfConst)) \
5880 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5881 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5882 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5883 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5884 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5885 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5886 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5887 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5888 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5889 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5890# undef ASSERT_ONE_EFL
5891 }
5892# endif
5893#endif
5894
5895 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5896 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5897
5898 /** @todo This could be prettier...*/
5899 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5900 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5901 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5902 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5903 Assert(pVar->idxReg == UINT8_MAX);
5904 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5905 {
5906 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5907 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5908 * that's counter productive... */
5909 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5910 a_fLivenessEflInput, a_fLivenessEflOutput);
5911 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5912 }
5913 else
5914 {
5915 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5916 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5917 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5918 a_fLivenessEflInput, a_fLivenessEflOutput);
5919 if (idxGstReg != UINT8_MAX)
5920 {
5921 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5922 iemNativeRegFreeTmp(pReNative, idxGstReg);
5923 }
5924 else
5925 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxVarReg);
5926 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5927 }
5928 return off;
5929}
5930
5931
5932
5933/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5934 * start using it with custom native code emission (inlining assembly
5935 * instruction helpers). */
5936#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5937#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5938 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5939 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5940 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5941 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5942
5943#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5944#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5945 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5946 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5947 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5948 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5949
5950/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5951template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5952 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5953DECL_INLINE_THROW(uint32_t)
5954iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5955{
5956 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5957 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5958
5959#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5960# ifdef VBOX_STRICT
5961 if ( pReNative->idxCurCall != 0
5962 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5963 {
5964 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5965# define ASSERT_ONE_EFL(a_idxField) \
5966 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5967 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5968 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5969 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5970 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5971 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5972 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5973 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
5974 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
5975 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
5976 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
5977 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
5978 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
5979 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
5980# undef ASSERT_ONE_EFL
5981 }
5982# endif
5983#endif
5984
5985#ifdef VBOX_STRICT
5986 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5987 uint32_t offFixup = off;
5988 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5989 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5990 iemNativeFixupFixedJump(pReNative, offFixup, off);
5991
5992 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5993 offFixup = off;
5994 off = iemNativeEmitJzToFixed(pReNative, off, off);
5995 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5996 iemNativeFixupFixedJump(pReNative, offFixup, off);
5997
5998 /** @todo validate that only bits in the a_fEflOutput mask changed. */
5999#endif
6000
6001#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6002 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6003 {
6004 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6005 if (pReNative->fSkippingEFlags)
6006 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6007 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6008 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6009 pReNative->fSkippingEFlags = 0;
6010 else
6011 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6012# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6013 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6014 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6015 else
6016 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6017 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6018# endif
6019 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6020 }
6021#endif
6022
6023 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6024 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxReg);
6025 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6026 return off;
6027}
6028
6029
6030typedef enum IEMNATIVEMITEFLOP
6031{
6032 kIemNativeEmitEflOp_Set,
6033 kIemNativeEmitEflOp_Clear,
6034 kIemNativeEmitEflOp_Flip
6035} IEMNATIVEMITEFLOP;
6036
6037#define IEM_MC_SET_EFL_BIT(a_fBit) \
6038 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6039
6040#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6041 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6042
6043#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6044 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6045
6046/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6047template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6048DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6049{
6050 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6051 a_enmOp == kIemNativeEmitEflOp_Flip
6052 ? a_fLivenessEflBit : 0,
6053 a_fLivenessEflBit);
6054
6055 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6056 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6057 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6058 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6059 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6060 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6061 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6062 else
6063 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6064 || a_enmOp == kIemNativeEmitEflOp_Clear
6065 || a_enmOp == kIemNativeEmitEflOp_Flip);
6066
6067 /** @todo No delayed writeback for EFLAGS right now. */
6068 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
6069
6070 /* Free but don't flush the EFLAGS register. */
6071 iemNativeRegFreeTmp(pReNative, idxEflReg);
6072
6073#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6074 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6075 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6076 && (a_fEflBit & X86_EFL_STATUS_BITS))
6077 {
6078 if (pReNative->fSkippingEFlags)
6079 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6080 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6081 pReNative->fSkippingEFlags &= ~a_fEflBit;
6082# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6083 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6084# endif
6085 }
6086#endif
6087
6088 return off;
6089}
6090
6091
6092/*********************************************************************************************************************************
6093* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6094*********************************************************************************************************************************/
6095
6096#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6097 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6098
6099#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6100 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6101
6102#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6103 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6104
6105
6106/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6107 * IEM_MC_FETCH_SREG_ZX_U64. */
6108DECL_INLINE_THROW(uint32_t)
6109iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6110{
6111 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6112 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6113 Assert(iSReg < X86_SREG_COUNT);
6114
6115 /*
6116 * For now, we will not create a shadow copy of a selector. The rational
6117 * is that since we do not recompile the popping and loading of segment
6118 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6119 * pushing and moving to registers, there is only a small chance that the
6120 * shadow copy will be accessed again before the register is reloaded. One
6121 * scenario would be nested called in 16-bit code, but I doubt it's worth
6122 * the extra register pressure atm.
6123 *
6124 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6125 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6126 * store scencario covered at present (r160730).
6127 */
6128 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6129 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6130 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6131 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6132 return off;
6133}
6134
6135
6136
6137/*********************************************************************************************************************************
6138* Register references. *
6139*********************************************************************************************************************************/
6140
6141#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6142 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6143
6144#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6145 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6146
6147/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6148DECL_INLINE_THROW(uint32_t)
6149iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6150{
6151 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6152 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6153 Assert(iGRegEx < 20);
6154
6155 if (iGRegEx < 16)
6156 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6157 else
6158 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6159
6160 /* If we've delayed writing back the register value, flush it now. */
6161 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGRegEx & 15);
6162
6163 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6164 if (!fConst)
6165 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6166
6167 return off;
6168}
6169
6170#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6171 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6172
6173#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6174 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6175
6176#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6177 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6178
6179#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6180 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6181
6182#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6183 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6184
6185#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6186 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6187
6188#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6189 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6190
6191#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6192 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6193
6194#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6195 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6196
6197#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6198 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6199
6200/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6201DECL_INLINE_THROW(uint32_t)
6202iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6203{
6204 Assert(iGReg < 16);
6205 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6207
6208 /* If we've delayed writing back the register value, flush it now. */
6209 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGReg);
6210
6211 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6212 if (!fConst)
6213 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6214
6215 return off;
6216}
6217
6218
6219#undef IEM_MC_REF_EFLAGS /* should not be used. */
6220#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6221 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6222 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6223
6224/** Handles IEM_MC_REF_EFLAGS. */
6225template<uint32_t const a_fEflOutput>
6226DECL_INLINE_THROW(uint32_t)
6227iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6228{
6229 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6230 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6231
6232#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6233 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6234 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6235 if (pReNative->fSkippingEFlags)
6236 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6237 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6238 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6239# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6240
6241 /* Updating the skipping according to the outputs is a little early, but
6242 we don't have any other hooks for references atm. */
6243 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6244 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6245 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6246 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6247 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6248# endif
6249
6250 /* This ASSUMES that EFLAGS references are not taken before use. */
6251 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6252
6253#endif
6254 RT_NOREF(fEflInput);
6255
6256 /* If we've delayed writing back the register value, flush it now. */
6257 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_EFlags>(pReNative, off, 0);
6258
6259 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6260 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6261
6262 return off;
6263}
6264
6265
6266/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6267 * different code from threaded recompiler, maybe it would be helpful. For now
6268 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6269#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6270
6271
6272#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6273 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6274
6275#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6276 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6277
6278#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6279 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6280
6281#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6282 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6283
6284/* Just being paranoid here. */
6285#ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6286AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6287AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6288AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6289AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6290#endif
6291AssertCompileMemberOffset(X86XMMREG, au64, 0);
6292AssertCompileMemberOffset(X86XMMREG, au32, 0);
6293AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6294AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6295
6296#define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6297 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6298#define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6299 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6300#define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6301 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6302#define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6303 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6304
6305/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6306DECL_INLINE_THROW(uint32_t)
6307iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6308{
6309 Assert(iXReg < 16);
6310 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6311 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6312
6313 /* If we've delayed writing back the register value, flush it now. */
6314 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_XReg>(pReNative, off, iXReg);
6315
6316 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6317 if (!fConst)
6318 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6319
6320 return off;
6321}
6322
6323
6324
6325/*********************************************************************************************************************************
6326* Effective Address Calculation *
6327*********************************************************************************************************************************/
6328#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6329 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6330
6331/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6332 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6333DECL_INLINE_THROW(uint32_t)
6334iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6335 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6336{
6337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6338
6339 /*
6340 * Handle the disp16 form with no registers first.
6341 *
6342 * Convert to an immediate value, as that'll delay the register allocation
6343 * and assignment till the memory access / call / whatever and we can use
6344 * a more appropriate register (or none at all).
6345 */
6346 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6347 {
6348 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6349 return off;
6350 }
6351
6352 /* Determin the displacment. */
6353 uint16_t u16EffAddr;
6354 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6355 {
6356 case 0: u16EffAddr = 0; break;
6357 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6358 case 2: u16EffAddr = u16Disp; break;
6359 default: AssertFailedStmt(u16EffAddr = 0);
6360 }
6361
6362 /* Determine the registers involved. */
6363 uint8_t idxGstRegBase;
6364 uint8_t idxGstRegIndex;
6365 switch (bRm & X86_MODRM_RM_MASK)
6366 {
6367 case 0:
6368 idxGstRegBase = X86_GREG_xBX;
6369 idxGstRegIndex = X86_GREG_xSI;
6370 break;
6371 case 1:
6372 idxGstRegBase = X86_GREG_xBX;
6373 idxGstRegIndex = X86_GREG_xDI;
6374 break;
6375 case 2:
6376 idxGstRegBase = X86_GREG_xBP;
6377 idxGstRegIndex = X86_GREG_xSI;
6378 break;
6379 case 3:
6380 idxGstRegBase = X86_GREG_xBP;
6381 idxGstRegIndex = X86_GREG_xDI;
6382 break;
6383 case 4:
6384 idxGstRegBase = X86_GREG_xSI;
6385 idxGstRegIndex = UINT8_MAX;
6386 break;
6387 case 5:
6388 idxGstRegBase = X86_GREG_xDI;
6389 idxGstRegIndex = UINT8_MAX;
6390 break;
6391 case 6:
6392 idxGstRegBase = X86_GREG_xBP;
6393 idxGstRegIndex = UINT8_MAX;
6394 break;
6395#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6396 default:
6397#endif
6398 case 7:
6399 idxGstRegBase = X86_GREG_xBX;
6400 idxGstRegIndex = UINT8_MAX;
6401 break;
6402 }
6403
6404 /*
6405 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6406 */
6407 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6408 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6409 kIemNativeGstRegUse_ReadOnly);
6410 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6411 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6412 kIemNativeGstRegUse_ReadOnly)
6413 : UINT8_MAX;
6414#ifdef RT_ARCH_AMD64
6415 if (idxRegIndex == UINT8_MAX)
6416 {
6417 if (u16EffAddr == 0)
6418 {
6419 /* movxz ret, base */
6420 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6421 }
6422 else
6423 {
6424 /* lea ret32, [base64 + disp32] */
6425 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6426 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6427 if (idxRegRet >= 8 || idxRegBase >= 8)
6428 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6429 pbCodeBuf[off++] = 0x8d;
6430 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6431 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6432 else
6433 {
6434 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6435 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6436 }
6437 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6438 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6439 pbCodeBuf[off++] = 0;
6440 pbCodeBuf[off++] = 0;
6441 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6442
6443 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6444 }
6445 }
6446 else
6447 {
6448 /* lea ret32, [index64 + base64 (+ disp32)] */
6449 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6450 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6451 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6452 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6453 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6454 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6455 pbCodeBuf[off++] = 0x8d;
6456 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6457 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6458 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6459 if (bMod == X86_MOD_MEM4)
6460 {
6461 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6462 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6463 pbCodeBuf[off++] = 0;
6464 pbCodeBuf[off++] = 0;
6465 }
6466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6467 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6468 }
6469
6470#elif defined(RT_ARCH_ARM64)
6471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6472 if (u16EffAddr == 0)
6473 {
6474 if (idxRegIndex == UINT8_MAX)
6475 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6476 else
6477 {
6478 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6479 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6480 }
6481 }
6482 else
6483 {
6484 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6485 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6486 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6487 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6488 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6489 else
6490 {
6491 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6492 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6493 }
6494 if (idxRegIndex != UINT8_MAX)
6495 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6496 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6497 }
6498
6499#else
6500# error "port me"
6501#endif
6502
6503 if (idxRegIndex != UINT8_MAX)
6504 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6505 iemNativeRegFreeTmp(pReNative, idxRegBase);
6506 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6507 return off;
6508}
6509
6510
6511#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6512 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6513
6514/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6515 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6516DECL_INLINE_THROW(uint32_t)
6517iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6518 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6519{
6520 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6521
6522 /*
6523 * Handle the disp32 form with no registers first.
6524 *
6525 * Convert to an immediate value, as that'll delay the register allocation
6526 * and assignment till the memory access / call / whatever and we can use
6527 * a more appropriate register (or none at all).
6528 */
6529 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6530 {
6531 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6532 return off;
6533 }
6534
6535 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6536 uint32_t u32EffAddr = 0;
6537 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6538 {
6539 case 0: break;
6540 case 1: u32EffAddr = (int8_t)u32Disp; break;
6541 case 2: u32EffAddr = u32Disp; break;
6542 default: AssertFailed();
6543 }
6544
6545 /* Get the register (or SIB) value. */
6546 uint8_t idxGstRegBase = UINT8_MAX;
6547 uint8_t idxGstRegIndex = UINT8_MAX;
6548 uint8_t cShiftIndex = 0;
6549 switch (bRm & X86_MODRM_RM_MASK)
6550 {
6551 case 0: idxGstRegBase = X86_GREG_xAX; break;
6552 case 1: idxGstRegBase = X86_GREG_xCX; break;
6553 case 2: idxGstRegBase = X86_GREG_xDX; break;
6554 case 3: idxGstRegBase = X86_GREG_xBX; break;
6555 case 4: /* SIB */
6556 {
6557 /* index /w scaling . */
6558 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6559 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6560 {
6561 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6562 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6563 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6564 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6565 case 4: cShiftIndex = 0; /*no index*/ break;
6566 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6567 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6568 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6569 }
6570
6571 /* base */
6572 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6573 {
6574 case 0: idxGstRegBase = X86_GREG_xAX; break;
6575 case 1: idxGstRegBase = X86_GREG_xCX; break;
6576 case 2: idxGstRegBase = X86_GREG_xDX; break;
6577 case 3: idxGstRegBase = X86_GREG_xBX; break;
6578 case 4:
6579 idxGstRegBase = X86_GREG_xSP;
6580 u32EffAddr += uSibAndRspOffset >> 8;
6581 break;
6582 case 5:
6583 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6584 idxGstRegBase = X86_GREG_xBP;
6585 else
6586 {
6587 Assert(u32EffAddr == 0);
6588 u32EffAddr = u32Disp;
6589 }
6590 break;
6591 case 6: idxGstRegBase = X86_GREG_xSI; break;
6592 case 7: idxGstRegBase = X86_GREG_xDI; break;
6593 }
6594 break;
6595 }
6596 case 5: idxGstRegBase = X86_GREG_xBP; break;
6597 case 6: idxGstRegBase = X86_GREG_xSI; break;
6598 case 7: idxGstRegBase = X86_GREG_xDI; break;
6599 }
6600
6601 /*
6602 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6603 * the start of the function.
6604 */
6605 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6606 {
6607 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6608 return off;
6609 }
6610
6611 /*
6612 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6613 */
6614 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6615 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6616 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6617 kIemNativeGstRegUse_ReadOnly);
6618 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6619 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6620 kIemNativeGstRegUse_ReadOnly);
6621
6622 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6623 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6624 {
6625 idxRegBase = idxRegIndex;
6626 idxRegIndex = UINT8_MAX;
6627 }
6628
6629#ifdef RT_ARCH_AMD64
6630 if (idxRegIndex == UINT8_MAX)
6631 {
6632 if (u32EffAddr == 0)
6633 {
6634 /* mov ret, base */
6635 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6636 }
6637 else
6638 {
6639 /* lea ret32, [base64 + disp32] */
6640 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6641 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6642 if (idxRegRet >= 8 || idxRegBase >= 8)
6643 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6644 pbCodeBuf[off++] = 0x8d;
6645 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6646 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6647 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6648 else
6649 {
6650 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6651 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6652 }
6653 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6654 if (bMod == X86_MOD_MEM4)
6655 {
6656 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6657 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6658 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6659 }
6660 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6661 }
6662 }
6663 else
6664 {
6665 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6666 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6667 if (idxRegBase == UINT8_MAX)
6668 {
6669 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6670 if (idxRegRet >= 8 || idxRegIndex >= 8)
6671 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6672 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6673 pbCodeBuf[off++] = 0x8d;
6674 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6675 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6676 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6677 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6678 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6679 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6680 }
6681 else
6682 {
6683 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6684 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6685 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6686 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6687 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6688 pbCodeBuf[off++] = 0x8d;
6689 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6690 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6691 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6692 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6693 if (bMod != X86_MOD_MEM0)
6694 {
6695 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6696 if (bMod == X86_MOD_MEM4)
6697 {
6698 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6699 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6700 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6701 }
6702 }
6703 }
6704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6705 }
6706
6707#elif defined(RT_ARCH_ARM64)
6708 if (u32EffAddr == 0)
6709 {
6710 if (idxRegIndex == UINT8_MAX)
6711 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6712 else if (idxRegBase == UINT8_MAX)
6713 {
6714 if (cShiftIndex == 0)
6715 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6716 else
6717 {
6718 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6719 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6720 }
6721 }
6722 else
6723 {
6724 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6725 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6726 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6727 }
6728 }
6729 else
6730 {
6731 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6732 {
6733 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6734 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6735 }
6736 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6737 {
6738 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6739 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6740 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6741 }
6742 else
6743 {
6744 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6745 if (idxRegBase != UINT8_MAX)
6746 {
6747 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6748 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6749 }
6750 }
6751 if (idxRegIndex != UINT8_MAX)
6752 {
6753 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6754 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6755 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6756 }
6757 }
6758
6759#else
6760# error "port me"
6761#endif
6762
6763 if (idxRegIndex != UINT8_MAX)
6764 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6765 if (idxRegBase != UINT8_MAX)
6766 iemNativeRegFreeTmp(pReNative, idxRegBase);
6767 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6768 return off;
6769}
6770
6771
6772#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6773 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6774 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6775
6776#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6777 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6778 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6779
6780#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6781 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6782 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6783
6784/**
6785 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6786 *
6787 * @returns New off.
6788 * @param pReNative .
6789 * @param off .
6790 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6791 * bit 4 to REX.X. The two bits are part of the
6792 * REG sub-field, which isn't needed in this
6793 * function.
6794 * @param uSibAndRspOffset Two parts:
6795 * - The first 8 bits make up the SIB byte.
6796 * - The next 8 bits are the fixed RSP/ESP offset
6797 * in case of a pop [xSP].
6798 * @param u32Disp The displacement byte/word/dword, if any.
6799 * @param cbInstr The size of the fully decoded instruction. Used
6800 * for RIP relative addressing.
6801 * @param idxVarRet The result variable number.
6802 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6803 * when calculating the address.
6804 *
6805 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6806 */
6807DECL_INLINE_THROW(uint32_t)
6808iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6809 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6810{
6811 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6812
6813 /*
6814 * Special case the rip + disp32 form first.
6815 */
6816 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6817 {
6818 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6819 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6820 kIemNativeGstRegUse_ReadOnly);
6821 if (f64Bit)
6822 {
6823#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6824 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6825#else
6826 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6827#endif
6828#ifdef RT_ARCH_AMD64
6829 if ((int32_t)offFinalDisp == offFinalDisp)
6830 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6831 else
6832 {
6833 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6834 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6835 }
6836#else
6837 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6838#endif
6839 }
6840 else
6841 {
6842# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6843 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6844# else
6845 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6846# endif
6847 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6848 }
6849 iemNativeRegFreeTmp(pReNative, idxRegPc);
6850 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6851 return off;
6852 }
6853
6854 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6855 int64_t i64EffAddr = 0;
6856 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6857 {
6858 case 0: break;
6859 case 1: i64EffAddr = (int8_t)u32Disp; break;
6860 case 2: i64EffAddr = (int32_t)u32Disp; break;
6861 default: AssertFailed();
6862 }
6863
6864 /* Get the register (or SIB) value. */
6865 uint8_t idxGstRegBase = UINT8_MAX;
6866 uint8_t idxGstRegIndex = UINT8_MAX;
6867 uint8_t cShiftIndex = 0;
6868 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6869 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6870 else /* SIB: */
6871 {
6872 /* index /w scaling . */
6873 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6874 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6875 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6876 if (idxGstRegIndex == 4)
6877 {
6878 /* no index */
6879 cShiftIndex = 0;
6880 idxGstRegIndex = UINT8_MAX;
6881 }
6882
6883 /* base */
6884 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6885 if (idxGstRegBase == 4)
6886 {
6887 /* pop [rsp] hack */
6888 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6889 }
6890 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6891 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6892 {
6893 /* mod=0 and base=5 -> disp32, no base reg. */
6894 Assert(i64EffAddr == 0);
6895 i64EffAddr = (int32_t)u32Disp;
6896 idxGstRegBase = UINT8_MAX;
6897 }
6898 }
6899
6900 /*
6901 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6902 * the start of the function.
6903 */
6904 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6905 {
6906 if (f64Bit)
6907 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6908 else
6909 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6910 return off;
6911 }
6912
6913 /*
6914 * Now emit code that calculates:
6915 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6916 * or if !f64Bit:
6917 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6918 */
6919 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6920 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6921 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6922 kIemNativeGstRegUse_ReadOnly);
6923 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6924 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6925 kIemNativeGstRegUse_ReadOnly);
6926
6927 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6928 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6929 {
6930 idxRegBase = idxRegIndex;
6931 idxRegIndex = UINT8_MAX;
6932 }
6933
6934#ifdef RT_ARCH_AMD64
6935 uint8_t bFinalAdj;
6936 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6937 bFinalAdj = 0; /* likely */
6938 else
6939 {
6940 /* pop [rsp] with a problematic disp32 value. Split out the
6941 RSP offset and add it separately afterwards (bFinalAdj). */
6942 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6943 Assert(idxGstRegBase == X86_GREG_xSP);
6944 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6945 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6946 Assert(bFinalAdj != 0);
6947 i64EffAddr -= bFinalAdj;
6948 Assert((int32_t)i64EffAddr == i64EffAddr);
6949 }
6950 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6951//pReNative->pInstrBuf[off++] = 0xcc;
6952
6953 if (idxRegIndex == UINT8_MAX)
6954 {
6955 if (u32EffAddr == 0)
6956 {
6957 /* mov ret, base */
6958 if (f64Bit)
6959 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6960 else
6961 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6962 }
6963 else
6964 {
6965 /* lea ret, [base + disp32] */
6966 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6967 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6968 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6969 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6970 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6971 | (f64Bit ? X86_OP_REX_W : 0);
6972 pbCodeBuf[off++] = 0x8d;
6973 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6974 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6975 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6976 else
6977 {
6978 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6979 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6980 }
6981 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6982 if (bMod == X86_MOD_MEM4)
6983 {
6984 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6985 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6986 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6987 }
6988 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6989 }
6990 }
6991 else
6992 {
6993 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6994 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6995 if (idxRegBase == UINT8_MAX)
6996 {
6997 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6998 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6999 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7000 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7001 | (f64Bit ? X86_OP_REX_W : 0);
7002 pbCodeBuf[off++] = 0x8d;
7003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7004 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7005 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7006 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7007 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7008 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7009 }
7010 else
7011 {
7012 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7013 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7014 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7015 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7016 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7017 | (f64Bit ? X86_OP_REX_W : 0);
7018 pbCodeBuf[off++] = 0x8d;
7019 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7020 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7021 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7022 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7023 if (bMod != X86_MOD_MEM0)
7024 {
7025 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7026 if (bMod == X86_MOD_MEM4)
7027 {
7028 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7029 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7030 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7031 }
7032 }
7033 }
7034 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7035 }
7036
7037 if (!bFinalAdj)
7038 { /* likely */ }
7039 else
7040 {
7041 Assert(f64Bit);
7042 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7043 }
7044
7045#elif defined(RT_ARCH_ARM64)
7046 if (i64EffAddr == 0)
7047 {
7048 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7049 if (idxRegIndex == UINT8_MAX)
7050 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7051 else if (idxRegBase != UINT8_MAX)
7052 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7053 f64Bit, false /*fSetFlags*/, cShiftIndex);
7054 else
7055 {
7056 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7057 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7058 }
7059 }
7060 else
7061 {
7062 if (f64Bit)
7063 { /* likely */ }
7064 else
7065 i64EffAddr = (int32_t)i64EffAddr;
7066
7067 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7068 {
7069 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7070 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7071 }
7072 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7073 {
7074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7075 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7076 }
7077 else
7078 {
7079 if (f64Bit)
7080 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7081 else
7082 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7083 if (idxRegBase != UINT8_MAX)
7084 {
7085 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7086 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7087 }
7088 }
7089 if (idxRegIndex != UINT8_MAX)
7090 {
7091 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7092 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7093 f64Bit, false /*fSetFlags*/, cShiftIndex);
7094 }
7095 }
7096
7097#else
7098# error "port me"
7099#endif
7100
7101 if (idxRegIndex != UINT8_MAX)
7102 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7103 if (idxRegBase != UINT8_MAX)
7104 iemNativeRegFreeTmp(pReNative, idxRegBase);
7105 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7106 return off;
7107}
7108
7109
7110/*********************************************************************************************************************************
7111* Memory fetches and stores common *
7112*********************************************************************************************************************************/
7113
7114typedef enum IEMNATIVEMITMEMOP
7115{
7116 kIemNativeEmitMemOp_Store = 0,
7117 kIemNativeEmitMemOp_Fetch,
7118 kIemNativeEmitMemOp_Fetch_Zx_U16,
7119 kIemNativeEmitMemOp_Fetch_Zx_U32,
7120 kIemNativeEmitMemOp_Fetch_Zx_U64,
7121 kIemNativeEmitMemOp_Fetch_Sx_U16,
7122 kIemNativeEmitMemOp_Fetch_Sx_U32,
7123 kIemNativeEmitMemOp_Fetch_Sx_U64
7124} IEMNATIVEMITMEMOP;
7125
7126/** Emits code for IEM_MC_FETCH_MEM_SEG_U8/16/32/64 and IEM_MC_STORE_MEM_SEG_U8/16/32/64,
7127 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7128 * (with iSegReg = UINT8_MAX). */
7129template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7130DECL_INLINE_THROW(uint32_t)
7131iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7132 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7133{
7134 /*
7135 * Assert sanity.
7136 */
7137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7138 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7139 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7140 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7141 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7143 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7144 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7145 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7146 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7147 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7148 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7149 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7150 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7151 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7152#ifdef VBOX_STRICT
7153 if (iSegReg == UINT8_MAX)
7154 {
7155 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7156 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7157 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7158 switch (a_cbMem)
7159 {
7160 case 1:
7161 Assert( pfnFunction
7162 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7163 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7164 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7165 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7166 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7167 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7168 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7169 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7170 : UINT64_C(0xc000b000a0009000) ));
7171 Assert(!a_fAlignMaskAndCtl);
7172 break;
7173 case 2:
7174 Assert( pfnFunction
7175 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7176 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7177 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7178 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7179 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7180 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7181 : UINT64_C(0xc000b000a0009000) ));
7182 Assert(a_fAlignMaskAndCtl <= 1);
7183 break;
7184 case 4:
7185 Assert( pfnFunction
7186 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7187 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7188 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7189 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7190 : UINT64_C(0xc000b000a0009000) ));
7191 Assert(a_fAlignMaskAndCtl <= 3);
7192 break;
7193 case 8:
7194 Assert( pfnFunction
7195 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7196 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7197 : UINT64_C(0xc000b000a0009000) ));
7198 Assert(a_fAlignMaskAndCtl <= 7);
7199 break;
7200 case sizeof(RTUINT128U):
7201 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7202 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7203 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7204 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7205 || ( a_enmOp == kIemNativeEmitMemOp_Store
7206 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7207 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7208 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7209 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7210 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7211 : a_fAlignMaskAndCtl <= 15U);
7212 break;
7213 case sizeof(RTUINT256U):
7214 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7215 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7216 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7217 || ( a_enmOp == kIemNativeEmitMemOp_Store
7218 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7219 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7220 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7221 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7222 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7223 : a_fAlignMaskAndCtl <= 31);
7224 break;
7225 }
7226 }
7227 else
7228 {
7229 Assert(iSegReg < 6);
7230 switch (a_cbMem)
7231 {
7232 case 1:
7233 Assert( pfnFunction
7234 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7235 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7236 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7237 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7238 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7239 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7240 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7241 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7242 : UINT64_C(0xc000b000a0009000) ));
7243 Assert(!a_fAlignMaskAndCtl);
7244 break;
7245 case 2:
7246 Assert( pfnFunction
7247 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7248 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7249 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7250 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7251 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7252 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7253 : UINT64_C(0xc000b000a0009000) ));
7254 Assert(a_fAlignMaskAndCtl <= 1);
7255 break;
7256 case 4:
7257 Assert( pfnFunction
7258 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7259 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7260 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7261 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7262 : UINT64_C(0xc000b000a0009000) ));
7263 Assert(a_fAlignMaskAndCtl <= 3);
7264 break;
7265 case 8:
7266 Assert( pfnFunction
7267 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7268 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7269 : UINT64_C(0xc000b000a0009000) ));
7270 Assert(a_fAlignMaskAndCtl <= 7);
7271 break;
7272 case sizeof(RTUINT128U):
7273 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7274 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7275 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7276 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7277 || ( a_enmOp == kIemNativeEmitMemOp_Store
7278 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7279 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7280 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7281 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7282 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7283 : a_fAlignMaskAndCtl <= 15);
7284 break;
7285 case sizeof(RTUINT256U):
7286 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7287 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7288 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7289 || ( a_enmOp == kIemNativeEmitMemOp_Store
7290 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7291 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7292 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7293 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7294 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7295 : a_fAlignMaskAndCtl <= 31);
7296 break;
7297 }
7298 }
7299#endif
7300
7301#ifdef VBOX_STRICT
7302 /*
7303 * Check that the fExec flags we've got make sense.
7304 */
7305 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7306#endif
7307
7308 /*
7309 * To keep things simple we have to commit any pending writes first as we
7310 * may end up making calls.
7311 */
7312 /** @todo we could postpone this till we make the call and reload the
7313 * registers after returning from the call. Not sure if that's sensible or
7314 * not, though. */
7315#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7316 off = iemNativeRegFlushPendingWrites(pReNative, off);
7317#else
7318 /* The program counter is treated differently for now. */
7319 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7320#endif
7321
7322#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7323 /*
7324 * Move/spill/flush stuff out of call-volatile registers.
7325 * This is the easy way out. We could contain this to the tlb-miss branch
7326 * by saving and restoring active stuff here.
7327 */
7328 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7329#endif
7330
7331 /*
7332 * Define labels and allocate the result register (trying for the return
7333 * register if we can).
7334 */
7335 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7336 RT_CONSTEXPR
7337 bool const fSimdRegValues = a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U);
7338 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7339 : fSimdRegValues
7340 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off)
7341 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7342 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7343 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7344 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem, offDisp);
7345 uint8_t const idxRegValueStore = a_enmOp != kIemNativeEmitMemOp_Store
7346 || TlbState.fSkip
7347 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7348 ? UINT8_MAX
7349 : fSimdRegValues
7350 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7351 : iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off);
7352 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7353 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7354 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7355 : UINT32_MAX;
7356
7357 /*
7358 * Jump to the TLB lookup code.
7359 */
7360 if (!TlbState.fSkip)
7361 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7362
7363 /*
7364 * TlbMiss:
7365 *
7366 * Call helper to do the fetching.
7367 * We flush all guest register shadow copies here.
7368 */
7369 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7370
7371#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7373#else
7374 RT_NOREF(idxInstr);
7375#endif
7376
7377#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7378 if (pReNative->Core.offPc)
7379 {
7380 /*
7381 * Update the program counter but restore it at the end of the TlbMiss branch.
7382 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7383 * which are hopefully much more frequent, reducing the amount of memory accesses.
7384 */
7385 /* Allocate a temporary PC register. */
7386/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7387 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7388 kIemNativeGstRegUse_ForUpdate);
7389
7390 /* Perform the addition and store the result. */
7391 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7392 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7393# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7394 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7395# endif
7396
7397 /* Free and flush the PC register. */
7398 iemNativeRegFreeTmp(pReNative, idxPcReg);
7399 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7400 }
7401#endif
7402
7403#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7404 /* Save variables in volatile registers. */
7405 uint32_t const fHstGprsNotToSave = TlbState.getRegsNotToSave()
7406 | (idxRegMemResult < 32 ? RT_BIT_32(idxRegMemResult) : 0)
7407#ifdef _MSC_VER /* Workaround for stupid compiler (2019). */
7408 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch & 0x1f) : 0);
7409#else
7410 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch) : 0);
7411#endif
7412 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstGprsNotToSave);
7413#endif
7414
7415 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7416 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7417 if RT_CONSTEXPR_IF(fSimdRegValues)
7418 {
7419 /*
7420 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7421 *
7422 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7423 * which must not be freed or the value loaded into the register will not be synced into the register
7424 * further down the road because the variable doesn't know it had a variable assigned.
7425 *
7426 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7427 * as it will be overwritten anyway.
7428 */
7429 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7430 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7431 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7432 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7433 }
7434 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7435 {
7436 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7437 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7438#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7439 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7440#else
7441 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7442 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7443#endif
7444 }
7445
7446 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7447 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7448#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7449 fVolGregMask);
7450#else
7451 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7452#endif
7453
7454 if RT_CONSTEXPR_IF(!a_fFlat)
7455 {
7456 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7457 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7458 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7459 }
7460
7461#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7462 /* Do delayed EFLAGS calculations. */
7463 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || fSimdRegValues)
7464 {
7465 if RT_CONSTEXPR_IF(a_fFlat)
7466 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7467 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7468 fHstGprsNotToSave);
7469 else
7470 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7471 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7472 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7473 fHstGprsNotToSave);
7474 }
7475 else if RT_CONSTEXPR_IF(a_fFlat)
7476 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7477 fHstGprsNotToSave);
7478 else
7479 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7480 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7481 fHstGprsNotToSave);
7482#endif
7483
7484 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7485 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7486
7487 /* Done setting up parameters, make the call. */
7488 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7489
7490 /*
7491 * Put the result in the right register if this is a fetch.
7492 */
7493 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7494 {
7495 if RT_CONSTEXPR_IF(fSimdRegValues)
7496 {
7497 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7498
7499 /* Sync the value on the stack with the host register assigned to the variable. */
7500 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7501 }
7502 else
7503 {
7504 Assert(idxRegValueFetch == pVarValue->idxReg);
7505 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7506 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7507 }
7508 }
7509
7510#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7511 /* Restore variables and guest shadow registers to volatile registers. */
7512 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstGprsNotToSave);
7513 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7514#endif
7515
7516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7517 if (pReNative->Core.offPc)
7518 {
7519 /*
7520 * Time to restore the program counter to its original value.
7521 */
7522 /* Allocate a temporary PC register. */
7523 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7524 kIemNativeGstRegUse_ForUpdate);
7525
7526 /* Restore the original value. */
7527 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7528 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7529
7530 /* Free and flush the PC register. */
7531 iemNativeRegFreeTmp(pReNative, idxPcReg);
7532 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7533 }
7534#endif
7535
7536#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7537 if (!TlbState.fSkip)
7538 {
7539 /* end of TlbMiss - Jump to the done label. */
7540 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7541 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7542
7543 /*
7544 * TlbLookup:
7545 */
7546 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl,
7547 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ
7548 >(pReNative, off, &TlbState, iSegReg, idxLabelTlbLookup, idxLabelTlbMiss,
7549 idxRegMemResult, offDisp);
7550
7551 /*
7552 * Emit code to do the actual storing / fetching.
7553 */
7554 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7555# ifdef IEM_WITH_TLB_STATISTICS
7556 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7557 a_enmOp == kIemNativeEmitMemOp_Store
7558 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7559 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7560# endif
7561 switch (a_enmOp)
7562 {
7563 case kIemNativeEmitMemOp_Store:
7564 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7565 {
7566 switch (a_cbMem)
7567 {
7568 case 1:
7569 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7570 break;
7571 case 2:
7572 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7573 break;
7574 case 4:
7575 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7576 break;
7577 case 8:
7578 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7579 break;
7580 case sizeof(RTUINT128U):
7581 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7582 break;
7583 case sizeof(RTUINT256U):
7584 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7585 break;
7586 default:
7587 AssertFailed();
7588 }
7589 }
7590 else
7591 {
7592 switch (a_cbMem)
7593 {
7594 case 1:
7595 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7596 idxRegMemResult, TlbState.idxReg1);
7597 break;
7598 case 2:
7599 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7600 idxRegMemResult, TlbState.idxReg1);
7601 break;
7602 case 4:
7603 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7604 idxRegMemResult, TlbState.idxReg1);
7605 break;
7606 case 8:
7607 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7608 idxRegMemResult, TlbState.idxReg1);
7609 break;
7610 default:
7611 AssertFailed();
7612 }
7613 }
7614 break;
7615
7616 case kIemNativeEmitMemOp_Fetch:
7617 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7618 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7619 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7620 switch (a_cbMem)
7621 {
7622 case 1:
7623 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7624 break;
7625 case 2:
7626 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7627 break;
7628 case 4:
7629 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7630 break;
7631 case 8:
7632 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7633 break;
7634 case sizeof(RTUINT128U):
7635 /*
7636 * No need to sync back the register with the stack, this is done by the generic variable handling
7637 * code if there is a register assigned to a variable and the stack must be accessed.
7638 */
7639 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7640 break;
7641 case sizeof(RTUINT256U):
7642 /*
7643 * No need to sync back the register with the stack, this is done by the generic variable handling
7644 * code if there is a register assigned to a variable and the stack must be accessed.
7645 */
7646 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7647 break;
7648 default:
7649 AssertFailed();
7650 }
7651 break;
7652
7653 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7654 Assert(a_cbMem == 1);
7655 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7656 break;
7657
7658 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7659 Assert(a_cbMem == 1 || a_cbMem == 2);
7660 if (a_cbMem == 1)
7661 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7662 else
7663 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7664 break;
7665
7666 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7667 switch (a_cbMem)
7668 {
7669 case 1:
7670 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7671 break;
7672 case 2:
7673 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7674 break;
7675 case 4:
7676 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7677 break;
7678 default:
7679 AssertFailed();
7680 }
7681 break;
7682
7683 default:
7684 AssertFailed();
7685 }
7686
7687 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7688
7689 /*
7690 * TlbDone:
7691 */
7692 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7693
7694 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7695
7696# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7697 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7698 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7699# endif
7700 }
7701#else
7702 RT_NOREF(idxLabelTlbMiss);
7703#endif
7704
7705 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7706 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7707 return off;
7708}
7709
7710
7711
7712/*********************************************************************************************************************************
7713* Memory fetches (IEM_MEM_FETCH_XXX). *
7714*********************************************************************************************************************************/
7715
7716/* 8-bit segmented: */
7717#define IEM_MC_FETCH_MEM_SEG_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7718 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7719 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7720
7721#define IEM_MC_FETCH_MEM_SEG_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7722 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7723 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7724
7725#define IEM_MC_FETCH_MEM_SEG_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7726 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7727 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7728
7729#define IEM_MC_FETCH_MEM_SEG_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7730 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7731 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7732
7733#define IEM_MC_FETCH_MEM_SEG_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7734 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7735 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7736
7737#define IEM_MC_FETCH_MEM_SEG_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7738 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7739 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7740
7741#define IEM_MC_FETCH_MEM_SEG_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7742 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7743 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7744
7745/* 16-bit segmented: */
7746#define IEM_MC_FETCH_MEM_SEG_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7747 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7748 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7749
7750#define IEM_MC_FETCH_MEM_SEG_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7751 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7752 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7753
7754#define IEM_MC_FETCH_MEM_SEG_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7755 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7756 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7757
7758#define IEM_MC_FETCH_MEM_SEG_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7759 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7760 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7761
7762#define IEM_MC_FETCH_MEM_SEG_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7763 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7764 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7765
7766#define IEM_MC_FETCH_MEM_SEG_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7767 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7768 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7769
7770
7771/* 32-bit segmented: */
7772#define IEM_MC_FETCH_MEM_SEG_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7773 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7774 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7775
7776#define IEM_MC_FETCH_MEM_SEG_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7777 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7778 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7779
7780#define IEM_MC_FETCH_MEM_SEG_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7781 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7782 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7783
7784#define IEM_MC_FETCH_MEM_SEG_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7785 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7786 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7787
7788#define IEM_MC_FETCH_MEM_SEG_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7789 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7790 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7791
7792#define IEM_MC_FETCH_MEM_SEG_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7793 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7794 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7795 a_offDisp)
7796
7797#define IEM_MC_FETCH_MEM_SEG_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7798 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7799 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7800
7801#define IEM_MC_FETCH_MEM_SEG_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7802 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7803 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7804
7805#define IEM_MC_FETCH_MEM_SEG_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7806 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7807 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7808
7809AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7810#define IEM_MC_FETCH_MEM_SEG_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7811 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7812 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7813
7814
7815/* 64-bit segmented: */
7816#define IEM_MC_FETCH_MEM_SEG_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7817 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7818 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7819
7820AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7821#define IEM_MC_FETCH_MEM_SEG_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7822 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7823 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7824
7825
7826/* 8-bit flat: */
7827#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7828 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7829 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7830
7831#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7832 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7833 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7834
7835#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7836 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7837 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7838
7839#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7840 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7841 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7842
7843#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7844 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7845 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7846
7847#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7848 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7849 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7850
7851#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7852 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7853 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7854
7855
7856/* 16-bit flat: */
7857#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7858 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7859 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7860
7861#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7862 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7863 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7864
7865#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7866 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7867 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7868
7869#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7870 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7871 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7872
7873#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7874 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7875 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7876
7877#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7878 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7879 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7880
7881/* 32-bit flat: */
7882#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7883 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7884 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7885
7886#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7887 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7888 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7889
7890#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7891 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7892 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7893
7894#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7895 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7896 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7897
7898#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7899 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7900 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7901
7902#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7903 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7904 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7905
7906#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7907 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7908 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7909
7910#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7911 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7912 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7913
7914#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7915 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7916 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7917
7918#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7919 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7920 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7921
7922
7923/* 64-bit flat: */
7924#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7925 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7926 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7927
7928#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7929 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7930 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7931
7932
7933/* 128-bit segmented: */
7934#define IEM_MC_FETCH_MEM_SEG_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7935 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7936 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7937
7938#define IEM_MC_FETCH_MEM_SEG_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7939 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7940 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7941 kIemNativeEmitMemOp_Fetch>(\
7942 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7943
7944AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7945#define IEM_MC_FETCH_MEM_SEG_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7946 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7947 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7948 kIemNativeEmitMemOp_Fetch>(\
7949 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7950
7951#define IEM_MC_FETCH_MEM_SEG_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7952 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7953 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7954
7955#define IEM_MC_FETCH_MEM_SEG_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7956 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
7957 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7958
7959
7960/* 128-bit flat: */
7961#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7962 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7963 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7964
7965#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7966 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7967 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7968 kIemNativeEmitMemOp_Fetch, true>(\
7969 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7970
7971#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7972 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7973 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7974 kIemNativeEmitMemOp_Fetch, true>(\
7975 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7976
7977#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7978 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7979 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7980
7981#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7982 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7983 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7984
7985/* 256-bit segmented: */
7986#define IEM_MC_FETCH_MEM_SEG_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7987 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7988 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7989
7990#define IEM_MC_FETCH_MEM_SEG_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7991 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7992 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7993
7994#define IEM_MC_FETCH_MEM_SEG_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7995 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
7996 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
7997 kIemNativeEmitMemOp_Fetch>(\
7998 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7999
8000#define IEM_MC_FETCH_MEM_SEG_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8001 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8002 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8003
8004
8005/* 256-bit flat: */
8006#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8007 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8008 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8009
8010#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8011 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8012 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8013
8014#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8015 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8016 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8017 kIemNativeEmitMemOp_Fetch, true>(\
8018 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8019
8020#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8021 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8022 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8023
8024
8025
8026/*********************************************************************************************************************************
8027* Memory stores (IEM_MEM_STORE_XXX). *
8028*********************************************************************************************************************************/
8029
8030#define IEM_MC_STORE_MEM_SEG_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8031 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8032 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8033
8034#define IEM_MC_STORE_MEM_SEG_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8035 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8036 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8037
8038#define IEM_MC_STORE_MEM_SEG_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8039 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8040 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8041
8042#define IEM_MC_STORE_MEM_SEG_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8043 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8044 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8045
8046
8047#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8048 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8049 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8050
8051#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8052 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8053 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8054
8055#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8056 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8057 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8058
8059#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8060 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8061 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8062
8063
8064#define IEM_MC_STORE_MEM_SEG_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8065 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8066 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8067
8068#define IEM_MC_STORE_MEM_SEG_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8069 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8070 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8071
8072#define IEM_MC_STORE_MEM_SEG_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8073 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8074 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8075
8076#define IEM_MC_STORE_MEM_SEG_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8077 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8078 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8079
8080
8081#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8082 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8083 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8084
8085#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8086 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8087 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8088
8089#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8090 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8091 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8092
8093#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8094 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8095 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8096
8097/** Emits code for IEM_MC_STORE_MEM_SEG_U8/16/32/64_CONST and
8098 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8099template<uint8_t const a_cbMem, bool a_fFlat = false>
8100DECL_INLINE_THROW(uint32_t)
8101iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8102 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8103{
8104 /*
8105 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8106 * to do the grunt work.
8107 */
8108 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8109 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8110 kIemNativeEmitMemOp_Store,
8111 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8112 idxVarGCPtrMem, pfnFunction, idxInstr);
8113 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8114 return off;
8115}
8116
8117
8118#define IEM_MC_STORE_MEM_SEG_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8119 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8120 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8121 kIemNativeEmitMemOp_Store>(\
8122 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8123
8124#define IEM_MC_STORE_MEM_SEG_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8125 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8126 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8127
8128#define IEM_MC_STORE_MEM_SEG_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8129 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8130 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8131
8132#define IEM_MC_STORE_MEM_SEG_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8133 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8134 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8135 kIemNativeEmitMemOp_Store>(\
8136 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8137
8138
8139#define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8140 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8141 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8142 kIemNativeEmitMemOp_Store, true>(\
8143 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8144 pCallEntry->idxInstr)
8145
8146#define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8147 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8148 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8149
8150#define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8151 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8152 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8153
8154#define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8155 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8156 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8157 true>(\
8158 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8159
8160
8161
8162/*********************************************************************************************************************************
8163* Stack Accesses. *
8164*********************************************************************************************************************************/
8165#define IEM_MC_PUSH_U16(a_u16Value) \
8166 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8167#define IEM_MC_PUSH_U32(a_u32Value) \
8168 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8169#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8170 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8171#define IEM_MC_PUSH_U64(a_u64Value) \
8172 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8173
8174#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8175 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8176#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8177 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8178#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8179 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8180
8181#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8182 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8183#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8184 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8185
8186
8187/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8188template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8189DECL_INLINE_THROW(uint32_t)
8190iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8191{
8192 /*
8193 * Assert sanity.
8194 */
8195 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
8196 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
8197 AssertCompile(!a_fIsSegReg || a_cBitsVar < 64);
8198 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8199 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8200#ifdef VBOX_STRICT
8201 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8202 if (a_cBitsFlat != 0)
8203 {
8204 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8205 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8206 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8207 Assert( pfnFunction
8208 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8209 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8210 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8211 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8212 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8213 : UINT64_C(0xc000b000a0009000) ));
8214 }
8215 else
8216 Assert( pfnFunction
8217 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8218 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8219 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8220 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8221 : UINT64_C(0xc000b000a0009000) ));
8222#endif
8223
8224#ifdef VBOX_STRICT
8225 /*
8226 * Check that the fExec flags we've got make sense.
8227 */
8228 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8229#endif
8230
8231 /*
8232 * To keep things simple we have to commit any pending writes first as we
8233 * may end up making calls.
8234 */
8235 /** @todo we could postpone this till we make the call and reload the
8236 * registers after returning from the call. Not sure if that's sensible or
8237 * not, though. */
8238 off = iemNativeRegFlushPendingWrites(pReNative, off);
8239
8240 /*
8241 * First we calculate the new RSP and the effective stack pointer value.
8242 * For 64-bit mode and flat 32-bit these two are the same.
8243 * (Code structure is very similar to that of PUSH)
8244 */
8245 RT_CONSTEXPR
8246 uint8_t const cbMem = a_cBitsVar / 8;
8247 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8248 uint8_t const cbMemAccess = !a_fIsSegReg || !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8249 ? cbMem : sizeof(uint16_t);
8250 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8251 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8252 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8253 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8254 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8255 {
8256 Assert(idxRegEffSp == idxRegRsp);
8257 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8258 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8259 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8260 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8261 else
8262 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8263 }
8264 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8265 {
8266 Assert(idxRegEffSp != idxRegRsp);
8267 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8268 kIemNativeGstRegUse_ReadOnly);
8269#ifdef RT_ARCH_AMD64
8270 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8271#else
8272 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8273#endif
8274 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8275 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8276 offFixupJumpToUseOtherBitSp = off;
8277 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8278 {
8279 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8280 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8281 }
8282 else
8283 {
8284 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8285 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8286 }
8287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8288 }
8289 /* SpUpdateEnd: */
8290 uint32_t const offLabelSpUpdateEnd = off;
8291
8292 /*
8293 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8294 * we're skipping lookup).
8295 */
8296 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8297 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8298 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8299 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8300 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8301 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8302 : UINT32_MAX;
8303 uint8_t const idxRegValue = !TlbState.fSkip
8304 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8305 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8306 IEMNATIVE_CALL_ARG2_GREG)
8307 : UINT8_MAX;
8308 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8309
8310
8311 if (!TlbState.fSkip)
8312 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8313 else
8314 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8315
8316 /*
8317 * Use16BitSp:
8318 */
8319 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8320 {
8321#ifdef RT_ARCH_AMD64
8322 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8323#else
8324 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8325#endif
8326 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8327 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8328 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8329 else
8330 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8331 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8333 }
8334
8335 /*
8336 * TlbMiss:
8337 *
8338 * Call helper to do the pushing.
8339 */
8340 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8341
8342#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8343 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8344#else
8345 RT_NOREF(idxInstr);
8346#endif
8347
8348 /* Save variables in volatile registers. */
8349 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8350 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8351 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8352 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8353 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8354
8355 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8356 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8357 {
8358 /* Swap them using ARG0 as temp register: */
8359 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8360 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8361 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8362 }
8363 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8364 {
8365 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8366 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8367 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8368
8369 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8370 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8371 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8372 }
8373 else
8374 {
8375 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8376 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8377
8378 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8379 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8380 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8381 }
8382
8383#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8384 /* Do delayed EFLAGS calculations. */
8385 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8386 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8387#endif
8388
8389 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8390 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8391
8392 /* Done setting up parameters, make the call. */
8393 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8394
8395 /* Restore variables and guest shadow registers to volatile registers. */
8396 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8397 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8398
8399#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8400 if (!TlbState.fSkip)
8401 {
8402 /* end of TlbMiss - Jump to the done label. */
8403 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8404 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8405
8406 /*
8407 * TlbLookup:
8408 */
8409 if (!a_fIsSegReg || cbMemAccess == cbMem)
8410 {
8411 Assert(cbMemAccess == cbMem);
8412 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState,
8413 iSegReg, idxLabelTlbLookup,
8414 idxLabelTlbMiss, idxRegMemResult);
8415 }
8416 else
8417 {
8418 Assert(cbMemAccess == sizeof(uint16_t));
8419 off = iemNativeEmitTlbLookup<true, sizeof(uint16_t), sizeof(uint16_t) - 1,
8420 IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
8421 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8422 }
8423
8424 /*
8425 * Emit code to do the actual storing / fetching.
8426 */
8427 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8428# ifdef IEM_WITH_TLB_STATISTICS
8429 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8430 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8431# endif
8432 if (idxRegValue != UINT8_MAX)
8433 {
8434 switch (cbMemAccess)
8435 {
8436 case 2:
8437 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8438 break;
8439 case 4:
8440 if (!a_fIsSegReg || !fIsIntelSeg)
8441 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8442 else
8443 {
8444 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8445 PUSH FS in real mode, so we have to try emulate that here.
8446 We borrow the now unused idxReg1 from the TLB lookup code here. */
8447 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8448 kIemNativeGstReg_EFlags);
8449 if (idxRegEfl != UINT8_MAX)
8450 {
8451# ifdef ARCH_AMD64
8452 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8453 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8454 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8455# else
8456 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8457 off, TlbState.idxReg1, idxRegEfl,
8458 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8459# endif
8460 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8461 }
8462 else
8463 {
8464 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, TlbState.idxReg1);
8465 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8466 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8467 }
8468 /* ASSUMES the upper half of idxRegValue is ZERO. */
8469 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8470 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8471 }
8472 break;
8473 case 8:
8474 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8475 break;
8476 default:
8477 AssertFailed();
8478 }
8479 }
8480 else
8481 {
8482 switch (cbMemAccess)
8483 {
8484 case 2:
8485 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8486 idxRegMemResult, TlbState.idxReg1);
8487 break;
8488 case 4:
8489 Assert(!a_fIsSegReg);
8490 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8491 idxRegMemResult, TlbState.idxReg1);
8492 break;
8493 case 8:
8494 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8495 break;
8496 default:
8497 AssertFailed();
8498 }
8499 }
8500
8501 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8502 TlbState.freeRegsAndReleaseVars(pReNative);
8503
8504 /*
8505 * TlbDone:
8506 *
8507 * Commit the new RSP value.
8508 */
8509 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8510 }
8511#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8512
8513#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8514 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8515#endif
8516 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8517 if (idxRegEffSp != idxRegRsp)
8518 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8519
8520 /* The value variable is implictly flushed. */
8521 if (idxRegValue != UINT8_MAX)
8522 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8523 iemNativeVarFreeLocal(pReNative, idxVarValue);
8524
8525 return off;
8526}
8527
8528
8529
8530#define IEM_MC_POP_GREG_U16(a_iGReg) \
8531 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8532#define IEM_MC_POP_GREG_U32(a_iGReg) \
8533 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8534#define IEM_MC_POP_GREG_U64(a_iGReg) \
8535 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8536
8537#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8538 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8539#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8540 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8541
8542#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8543 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8544#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8545 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8546
8547
8548DECL_FORCE_INLINE_THROW(uint32_t)
8549iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8550 uint8_t idxRegTmp)
8551{
8552 /* Use16BitSp: */
8553#ifdef RT_ARCH_AMD64
8554 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8555 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8556 RT_NOREF(idxRegTmp);
8557#else
8558 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8559 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8560 /* add tmp, regrsp, #cbMem */
8561 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8562 /* and tmp, tmp, #0xffff */
8563 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8564 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8565 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8566 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8567#endif
8568 return off;
8569}
8570
8571
8572DECL_FORCE_INLINE(uint32_t)
8573iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8574{
8575 /* Use32BitSp: */
8576 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8577 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8578 return off;
8579}
8580
8581
8582/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8583template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8584DECL_INLINE_THROW(uint32_t)
8585iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8586{
8587 /*
8588 * Assert sanity.
8589 */
8590 Assert(idxGReg < 16);
8591#ifdef VBOX_STRICT
8592 if (a_cBitsFlat != 0)
8593 {
8594 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8595 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8596 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8597 Assert( pfnFunction
8598 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8599 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8600 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8601 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8602 : UINT64_C(0xc000b000a0009000) ));
8603 }
8604 else
8605 Assert( pfnFunction
8606 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8607 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8608 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8609 : UINT64_C(0xc000b000a0009000) ));
8610#endif
8611
8612#ifdef VBOX_STRICT
8613 /*
8614 * Check that the fExec flags we've got make sense.
8615 */
8616 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8617#endif
8618
8619 /*
8620 * To keep things simple we have to commit any pending writes first as we
8621 * may end up making calls.
8622 */
8623 off = iemNativeRegFlushPendingWrites(pReNative, off);
8624
8625 /*
8626 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8627 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8628 * directly as the effective stack pointer.
8629 * (Code structure is very similar to that of PUSH)
8630 */
8631 uint8_t const cbMem = a_cBitsVar / 8;
8632 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8633 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8634 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8635 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8636 * will be the resulting register value. */
8637 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8638
8639 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8640 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8641 {
8642 Assert(idxRegEffSp == idxRegRsp);
8643 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8644 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8645 }
8646 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8647 {
8648 Assert(idxRegEffSp != idxRegRsp);
8649 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8650 kIemNativeGstRegUse_ReadOnly);
8651#ifdef RT_ARCH_AMD64
8652 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8653#else
8654 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8655#endif
8656 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8657 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8658 offFixupJumpToUseOtherBitSp = off;
8659 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8660 {
8661/** @todo can skip idxRegRsp updating when popping ESP. */
8662 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8663 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8664 }
8665 else
8666 {
8667 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8668 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8669 }
8670 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8671 }
8672 /* SpUpdateEnd: */
8673 uint32_t const offLabelSpUpdateEnd = off;
8674
8675 /*
8676 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8677 * we're skipping lookup).
8678 */
8679 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8680 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8681 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8682 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8683 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8684 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8685 : UINT32_MAX;
8686
8687 if (!TlbState.fSkip)
8688 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8689 else
8690 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8691
8692 /*
8693 * Use16BitSp:
8694 */
8695 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8696 {
8697#ifdef RT_ARCH_AMD64
8698 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8699#else
8700 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8701#endif
8702 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8703 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8704 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8705 else
8706 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8707 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8709 }
8710
8711 /*
8712 * TlbMiss:
8713 *
8714 * Call helper to do the pushing.
8715 */
8716 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8717
8718#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8719 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8720#else
8721 RT_NOREF(idxInstr);
8722#endif
8723
8724 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8725 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8726 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8727 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8728
8729
8730 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8731 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8733
8734#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8735 /* Do delayed EFLAGS calculations. */
8736 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8737#endif
8738
8739 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8741
8742 /* Done setting up parameters, make the call. */
8743 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8744
8745 /* Move the return register content to idxRegMemResult. */
8746 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8748
8749 /* Restore variables and guest shadow registers to volatile registers. */
8750 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8751 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8752
8753#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8754 if (!TlbState.fSkip)
8755 {
8756 /* end of TlbMiss - Jump to the done label. */
8757 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8758 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8759
8760 /*
8761 * TlbLookup:
8762 */
8763 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
8764 idxLabelTlbLookup, idxLabelTlbMiss,
8765 idxRegMemResult);
8766
8767 /*
8768 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8769 */
8770 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8771# ifdef IEM_WITH_TLB_STATISTICS
8772 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8773 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8774# endif
8775 switch (cbMem)
8776 {
8777 case 2:
8778 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8779 break;
8780 case 4:
8781 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8782 break;
8783 case 8:
8784 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8785 break;
8786 default:
8787 AssertFailed();
8788 }
8789
8790 TlbState.freeRegsAndReleaseVars(pReNative);
8791
8792 /*
8793 * TlbDone:
8794 *
8795 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8796 * commit the popped register value.
8797 */
8798 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8799 }
8800#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8801
8802 if (idxGReg != X86_GREG_xSP)
8803 {
8804 /* Set the register. */
8805 if (cbMem >= sizeof(uint32_t))
8806 {
8807#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8808 AssertMsg( pReNative->idxCurCall == 0
8809 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8810 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8811 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8812#endif
8813 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8814#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8815 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8816#endif
8817#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8818 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8819 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8820#endif
8821 }
8822 else
8823 {
8824 Assert(cbMem == sizeof(uint16_t));
8825 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8826 kIemNativeGstRegUse_ForUpdate);
8827 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8828#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8829 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8830#endif
8831 iemNativeRegFreeTmp(pReNative, idxRegDst);
8832 }
8833
8834 /* Complete RSP calculation for FLAT mode. */
8835 if (idxRegEffSp == idxRegRsp)
8836 {
8837 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8838 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8839 else
8840 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8841 }
8842 }
8843 else
8844 {
8845 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8846 if (cbMem == sizeof(uint64_t))
8847 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8848 else if (cbMem == sizeof(uint32_t))
8849 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8850 else
8851 {
8852 if (idxRegEffSp == idxRegRsp)
8853 {
8854 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8855 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8856 else
8857 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8858 }
8859 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8860 }
8861 }
8862
8863#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8864 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8865#endif
8866
8867 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8868 if (idxRegEffSp != idxRegRsp)
8869 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8870 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8871
8872 return off;
8873}
8874
8875
8876
8877/*********************************************************************************************************************************
8878* Memory mapping (IEM_MC_MEM_SEG_MAP_XXX, IEM_MC_MEM_FLAT_MAP_XXX). *
8879*********************************************************************************************************************************/
8880
8881#define IEM_MC_MEM_SEG_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8882 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8883 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8884
8885#define IEM_MC_MEM_SEG_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8886 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8887 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8888
8889#define IEM_MC_MEM_SEG_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8890 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8891 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8892
8893#define IEM_MC_MEM_SEG_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8894 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8895 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8896
8897
8898#define IEM_MC_MEM_SEG_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8899 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8900 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8901
8902#define IEM_MC_MEM_SEG_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8903 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8904 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8905
8906#define IEM_MC_MEM_SEG_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8907 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8908 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8909
8910#define IEM_MC_MEM_SEG_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8911 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8912 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8913
8914#define IEM_MC_MEM_SEG_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8915 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8916 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8917
8918
8919#define IEM_MC_MEM_SEG_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8920 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8921 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8922
8923#define IEM_MC_MEM_SEG_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8924 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8925 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8926
8927#define IEM_MC_MEM_SEG_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8928 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8929 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8930
8931#define IEM_MC_MEM_SEG_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8932 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8933 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8934
8935#define IEM_MC_MEM_SEG_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8936 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8937 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8938
8939
8940#define IEM_MC_MEM_SEG_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8941 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8942 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8943
8944#define IEM_MC_MEM_SEG_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8945 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8946 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8947#define IEM_MC_MEM_SEG_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8948 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8949 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8950
8951#define IEM_MC_MEM_SEG_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8952 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8953 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8954
8955#define IEM_MC_MEM_SEG_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8956 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8957 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8958
8959
8960#define IEM_MC_MEM_SEG_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8961 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8962 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8963
8964#define IEM_MC_MEM_SEG_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8965 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
8966 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
8967 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8968
8969
8970#define IEM_MC_MEM_SEG_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8971 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8972 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8973
8974#define IEM_MC_MEM_SEG_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8975 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8976 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8977
8978#define IEM_MC_MEM_SEG_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8979 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8980 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8981
8982#define IEM_MC_MEM_SEG_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8983 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8984 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8985
8986
8987
8988#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8989 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
8990 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8991
8992#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8993 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
8994 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8995
8996#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8997 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
8998 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8999
9000#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9001 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9002 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9003
9004
9005#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9006 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9007 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9008
9009#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9011 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9012
9013#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9014 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9015 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9016
9017#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9018 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9019 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9020
9021#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9022 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9023 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9024
9025
9026#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9027 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9028 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9029
9030#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9031 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9032 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9033
9034#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9035 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9036 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9037
9038#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9039 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9040 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9041
9042#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9043 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9044 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9045
9046
9047#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9048 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9049 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9050
9051#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9052 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9053 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9054
9055#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9056 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9057 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9058
9059#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9060 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9061 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9062
9063#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9064 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9065 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9066
9067
9068#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9069 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9070 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9071
9072#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9073 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9074 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9075 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9076
9077
9078#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9079 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9080 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9081
9082#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9083 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9084 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9085
9086#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9087 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9088 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9089
9090#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9091 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9092 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9093
9094
9095template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9096DECL_INLINE_THROW(uint32_t)
9097iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9098 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9099{
9100 /*
9101 * Assert sanity.
9102 */
9103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9104 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9105 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9106 && pVarMem->cbVar == sizeof(void *),
9107 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9108
9109 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9111 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9112 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9113 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9114
9115 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9116 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9117 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9118 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9119 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9120
9121 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9122
9123 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9124
9125#ifdef VBOX_STRICT
9126# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9127 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9128 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9129 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9130 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9131# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9132 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9133 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9134 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9135
9136 if RT_CONSTEXPR_IF(a_fFlat)
9137 {
9138 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9139 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9140 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9141 switch (a_cbMem)
9142 {
9143 case 1:
9144 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9145 Assert(!a_fAlignMaskAndCtl);
9146 break;
9147 case 2:
9148 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9149 Assert(a_fAlignMaskAndCtl < 2);
9150 break;
9151 case 4:
9152 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9153 Assert(a_fAlignMaskAndCtl < 4);
9154 break;
9155 case 8:
9156 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9157 Assert(a_fAlignMaskAndCtl < 8);
9158 break;
9159 case 10:
9160 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9161 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9162 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9163 Assert(a_fAlignMaskAndCtl < 8);
9164 break;
9165 case 16:
9166 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9167 Assert(a_fAlignMaskAndCtl < 16);
9168 break;
9169# if 0
9170 case 32:
9171 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9172 Assert(a_fAlignMaskAndCtl < 32);
9173 break;
9174 case 64:
9175 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9176 Assert(a_fAlignMaskAndCtl < 64);
9177 break;
9178# endif
9179 default: AssertFailed(); break;
9180 }
9181 }
9182 else
9183 {
9184 Assert(iSegReg < 6);
9185 switch (a_cbMem)
9186 {
9187 case 1:
9188 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9189 Assert(!a_fAlignMaskAndCtl);
9190 break;
9191 case 2:
9192 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9193 Assert(a_fAlignMaskAndCtl < 2);
9194 break;
9195 case 4:
9196 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9197 Assert(a_fAlignMaskAndCtl < 4);
9198 break;
9199 case 8:
9200 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9201 Assert(a_fAlignMaskAndCtl < 8);
9202 break;
9203 case 10:
9204 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9205 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9206 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9207 Assert(a_fAlignMaskAndCtl < 8);
9208 break;
9209 case 16:
9210 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9211 Assert(a_fAlignMaskAndCtl < 16);
9212 break;
9213# if 0
9214 case 32:
9215 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9216 Assert(a_fAlignMaskAndCtl < 32);
9217 break;
9218 case 64:
9219 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9220 Assert(a_fAlignMaskAndCtl < 64);
9221 break;
9222# endif
9223 default: AssertFailed(); break;
9224 }
9225 }
9226# undef IEM_MAP_HLP_FN
9227# undef IEM_MAP_HLP_FN_NO_AT
9228#endif
9229
9230#ifdef VBOX_STRICT
9231 /*
9232 * Check that the fExec flags we've got make sense.
9233 */
9234 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9235#endif
9236
9237 /*
9238 * To keep things simple we have to commit any pending writes first as we
9239 * may end up making calls.
9240 */
9241 off = iemNativeRegFlushPendingWrites(pReNative, off);
9242
9243#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9244 /*
9245 * Move/spill/flush stuff out of call-volatile registers.
9246 * This is the easy way out. We could contain this to the tlb-miss branch
9247 * by saving and restoring active stuff here.
9248 */
9249 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9250 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9251#endif
9252
9253 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9254 while the tlb-miss codepath will temporarily put it on the stack.
9255 Set the the type to stack here so we don't need to do it twice below. */
9256 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9257 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9258 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9259 * lookup is done. */
9260
9261 /*
9262 * Define labels and allocate the result register (trying for the return
9263 * register if we can).
9264 */
9265 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9266 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9267 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9268 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9269 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem);
9270 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9271 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9272 : UINT32_MAX;
9273
9274 /*
9275 * Jump to the TLB lookup code.
9276 */
9277 if (!TlbState.fSkip)
9278 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9279
9280 /*
9281 * TlbMiss:
9282 *
9283 * Call helper to do the fetching.
9284 * We flush all guest register shadow copies here.
9285 */
9286 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9287
9288#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9289 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9290#else
9291 RT_NOREF(idxInstr);
9292#endif
9293
9294#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9295 /* Save variables in volatile registers. */
9296 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9297 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9298#endif
9299
9300 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9301 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9302#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9303 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9304#else
9305 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9306#endif
9307
9308 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9309 if RT_CONSTEXPR_IF(!a_fFlat)
9310 {
9311 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9312 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9313 }
9314
9315#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9316 /* Do delayed EFLAGS calculations. */
9317 if RT_CONSTEXPR_IF(a_fFlat)
9318 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9319 fHstRegsNotToSave);
9320 else
9321 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9322 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9323 fHstRegsNotToSave);
9324#endif
9325
9326 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9327 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9328 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9329
9330 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9331 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9332
9333 /* Done setting up parameters, make the call. */
9334 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9335
9336 /*
9337 * Put the output in the right registers.
9338 */
9339 Assert(idxRegMemResult == pVarMem->idxReg);
9340 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9341 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9342
9343#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9344 /* Restore variables and guest shadow registers to volatile registers. */
9345 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9346 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9347#endif
9348
9349 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9350 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9351
9352#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9353 if (!TlbState.fSkip)
9354 {
9355 /* end of tlbsmiss - Jump to the done label. */
9356 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9357 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9358
9359 /*
9360 * TlbLookup:
9361 */
9362 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl, a_fAccess>(pReNative, off, &TlbState, iSegReg,
9363 idxLabelTlbLookup, idxLabelTlbMiss,
9364 idxRegMemResult);
9365# ifdef IEM_WITH_TLB_STATISTICS
9366 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9367 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9368# endif
9369
9370 /* [idxVarUnmapInfo] = 0; */
9371 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9372
9373 /*
9374 * TlbDone:
9375 */
9376 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9377
9378 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9379
9380# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9381 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9382 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9383# endif
9384 }
9385#else
9386 RT_NOREF(idxLabelTlbMiss);
9387#endif
9388
9389 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9390 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9391
9392 return off;
9393}
9394
9395
9396#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9397 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9398 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9399
9400#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9401 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9402 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9403
9404#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9405 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9406 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9407
9408#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9409 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9410 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9411
9412DECL_INLINE_THROW(uint32_t)
9413iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9414 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9415{
9416 /*
9417 * Assert sanity.
9418 */
9419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9420#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9421 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9422#endif
9423 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9424 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9425 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9426#ifdef VBOX_STRICT
9427 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9428 {
9429 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9430 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9431 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9432 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9433 case IEM_ACCESS_TYPE_WRITE:
9434 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9435 case IEM_ACCESS_TYPE_READ:
9436 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9437 default: AssertFailed();
9438 }
9439#else
9440 RT_NOREF(fAccess);
9441#endif
9442
9443 /*
9444 * To keep things simple we have to commit any pending writes first as we
9445 * may end up making calls (there shouldn't be any at this point, so this
9446 * is just for consistency).
9447 */
9448 /** @todo we could postpone this till we make the call and reload the
9449 * registers after returning from the call. Not sure if that's sensible or
9450 * not, though. */
9451 off = iemNativeRegFlushPendingWrites(pReNative, off);
9452
9453 /*
9454 * Move/spill/flush stuff out of call-volatile registers.
9455 *
9456 * We exclude any register holding the bUnmapInfo variable, as we'll be
9457 * checking it after returning from the call and will free it afterwards.
9458 */
9459 /** @todo save+restore active registers and maybe guest shadows in miss
9460 * scenario. */
9461 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9462 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9463
9464 /*
9465 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9466 * to call the unmap helper function.
9467 *
9468 * The likelyhood of it being zero is higher than for the TLB hit when doing
9469 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9470 * access should also end up with a mapping that won't need special unmapping.
9471 */
9472 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9473 * should speed up things for the pure interpreter as well when TLBs
9474 * are enabled. */
9475#ifdef RT_ARCH_AMD64
9476 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9477 {
9478 /* test byte [rbp - xxx], 0ffh */
9479 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9480 pbCodeBuf[off++] = 0xf6;
9481 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9482 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9483 pbCodeBuf[off++] = 0xff;
9484 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9485 }
9486 else
9487#endif
9488 {
9489 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9490 IEMNATIVE_CALL_ARG1_GREG);
9491 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9492 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9493 }
9494 uint32_t const offJmpFixup = off;
9495 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9496
9497 /*
9498 * Call the unmap helper function.
9499 */
9500#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9501 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9502#else
9503 RT_NOREF(idxInstr);
9504#endif
9505
9506 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9507 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9508 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9509
9510 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9511 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9512
9513 /* Done setting up parameters, make the call.
9514 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9515 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9516 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9517
9518 /* The bUnmapInfo variable is implictly free by these MCs. */
9519 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9520
9521 /*
9522 * Done, just fixup the jump for the non-call case.
9523 */
9524 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9525
9526 return off;
9527}
9528
9529
9530
9531/*********************************************************************************************************************************
9532* State and Exceptions *
9533*********************************************************************************************************************************/
9534
9535#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9536#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9537
9538#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9539#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9540#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9541
9542#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9543#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9544#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9545
9546
9547DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9548{
9549#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9550 RT_NOREF(pReNative, fForChange);
9551#else
9552 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9553 && fForChange)
9554 {
9555# ifdef RT_ARCH_AMD64
9556
9557 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9558 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9559 {
9560 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9561
9562 /* stmxcsr */
9563 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9564 pbCodeBuf[off++] = X86_OP_REX_B;
9565 pbCodeBuf[off++] = 0x0f;
9566 pbCodeBuf[off++] = 0xae;
9567 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9568 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9569 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9570 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9571 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9573
9574 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9575 }
9576
9577 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9578 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9579 kIemNativeGstRegUse_ReadOnly);
9580
9581 /*
9582 * Mask any exceptions and clear the exception status and save into MXCSR,
9583 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9584 * a register source/target (sigh).
9585 */
9586 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9587 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9588 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9589 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9590
9591 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9592
9593 /* ldmxcsr */
9594 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9595 pbCodeBuf[off++] = X86_OP_REX_B;
9596 pbCodeBuf[off++] = 0x0f;
9597 pbCodeBuf[off++] = 0xae;
9598 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9599 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9600 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9601 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9602 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9603 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9604
9605 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9606 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9607
9608# elif defined(RT_ARCH_ARM64)
9609 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9610
9611 /* Need to save the host floating point control register the first time, clear FPSR. */
9612 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9613 {
9614 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9615 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9616 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9617 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9618 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9619 }
9620
9621 /*
9622 * Translate MXCSR to FPCR.
9623 *
9624 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9625 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9626 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9627 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9628 */
9629 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9630 * and implement alternate handling if FEAT_AFP is present. */
9631 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9632 kIemNativeGstRegUse_ReadOnly);
9633
9634 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9635
9636 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9637 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9638
9639 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9640 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9641 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9642 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9643 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9644
9645 /*
9646 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9647 *
9648 * Value MXCSR FPCR
9649 * 0 RN RN
9650 * 1 R- R+
9651 * 2 R+ R-
9652 * 3 RZ RZ
9653 *
9654 * Conversion can be achieved by switching bit positions
9655 */
9656 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9657 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9658 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9659 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9660
9661 /* Write the value to FPCR. */
9662 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9663
9664 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9665 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9666 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9667# else
9668# error "Port me"
9669# endif
9670 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9671 }
9672#endif
9673 return off;
9674}
9675
9676
9677
9678/*********************************************************************************************************************************
9679* Emitters for FPU related operations. *
9680*********************************************************************************************************************************/
9681
9682#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9683 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9684
9685/** Emits code for IEM_MC_FETCH_FCW. */
9686DECL_INLINE_THROW(uint32_t)
9687iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9688{
9689 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9690 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9691
9692 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9693
9694 /* Allocate a temporary FCW register. */
9695 /** @todo eliminate extra register */
9696 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9697 kIemNativeGstRegUse_ReadOnly);
9698
9699 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9700
9701 /* Free but don't flush the FCW register. */
9702 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9703 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9704
9705 return off;
9706}
9707
9708
9709#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9710 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9711
9712/** Emits code for IEM_MC_FETCH_FSW. */
9713DECL_INLINE_THROW(uint32_t)
9714iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9715{
9716 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9717 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9718
9719 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9720 /* Allocate a temporary FSW register. */
9721 /** @todo eliminate extra register */
9722 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9723 kIemNativeGstRegUse_ReadOnly);
9724
9725 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9726
9727 /* Free but don't flush the FSW register. */
9728 iemNativeRegFreeTmp(pReNative, idxFswReg);
9729 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9730
9731 return off;
9732}
9733
9734
9735
9736/*********************************************************************************************************************************
9737* Emitters for SSE/AVX specific operations. *
9738*********************************************************************************************************************************/
9739
9740#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9741 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9742
9743/** Emits code for IEM_MC_COPY_XREG_U128. */
9744DECL_INLINE_THROW(uint32_t)
9745iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9746{
9747 /* This is a nop if the source and destination register are the same. */
9748 if (iXRegDst != iXRegSrc)
9749 {
9750 /* Allocate destination and source register. */
9751 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9752 kIemNativeGstSimdRegLdStSz_Low128,
9753 kIemNativeGstRegUse_ForFullWrite);
9754 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9755 kIemNativeGstSimdRegLdStSz_Low128,
9756 kIemNativeGstRegUse_ReadOnly);
9757
9758 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9759
9760 /* Free but don't flush the source and destination register. */
9761 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9762 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9763 }
9764
9765 return off;
9766}
9767
9768
9769#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9770 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9771
9772/** Emits code for IEM_MC_FETCH_XREG_U128. */
9773DECL_INLINE_THROW(uint32_t)
9774iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9775{
9776 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9777 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9778
9779 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9780 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9781
9782 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9783
9784 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9785
9786 /* Free but don't flush the source register. */
9787 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9788 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9789
9790 return off;
9791}
9792
9793
9794#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9795 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9796
9797#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9798 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9799
9800/** Emits code for IEM_MC_FETCH_XREG_U64. */
9801DECL_INLINE_THROW(uint32_t)
9802iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9803{
9804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9805 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9806
9807 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9808 kIemNativeGstSimdRegLdStSz_Low128,
9809 kIemNativeGstRegUse_ReadOnly);
9810
9811 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9812 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9813
9814 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9815
9816 /* Free but don't flush the source register. */
9817 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9818 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9819
9820 return off;
9821}
9822
9823
9824#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9825 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9826
9827#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9828 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9829
9830/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9831DECL_INLINE_THROW(uint32_t)
9832iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9833{
9834 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9835 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9836
9837 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9838 kIemNativeGstSimdRegLdStSz_Low128,
9839 kIemNativeGstRegUse_ReadOnly);
9840
9841 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9842 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9843
9844 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9845
9846 /* Free but don't flush the source register. */
9847 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9848 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9849
9850 return off;
9851}
9852
9853
9854#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9855 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9856
9857/** Emits code for IEM_MC_FETCH_XREG_U16. */
9858DECL_INLINE_THROW(uint32_t)
9859iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9860{
9861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9862 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9863
9864 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9865 kIemNativeGstSimdRegLdStSz_Low128,
9866 kIemNativeGstRegUse_ReadOnly);
9867
9868 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9869 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9870
9871 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9872
9873 /* Free but don't flush the source register. */
9874 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9875 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9876
9877 return off;
9878}
9879
9880
9881#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9882 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9883
9884/** Emits code for IEM_MC_FETCH_XREG_U8. */
9885DECL_INLINE_THROW(uint32_t)
9886iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9887{
9888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9889 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9890
9891 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9892 kIemNativeGstSimdRegLdStSz_Low128,
9893 kIemNativeGstRegUse_ReadOnly);
9894
9895 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9896 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9897
9898 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9899
9900 /* Free but don't flush the source register. */
9901 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9902 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9903
9904 return off;
9905}
9906
9907
9908#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9909 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9910
9911AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9912#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9913 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9914
9915
9916/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9917DECL_INLINE_THROW(uint32_t)
9918iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9919{
9920 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9921 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9922
9923 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9924 kIemNativeGstSimdRegLdStSz_Low128,
9925 kIemNativeGstRegUse_ForFullWrite);
9926 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9927
9928 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9929
9930 /* Free but don't flush the source register. */
9931 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9932 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9933
9934 return off;
9935}
9936
9937
9938#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9939 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9940
9941#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9942 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9943
9944#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9945 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9946
9947#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9948 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9949
9950#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9951 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9952
9953#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9954 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9955
9956/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9957DECL_INLINE_THROW(uint32_t)
9958iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9959 uint8_t cbLocal, uint8_t iElem)
9960{
9961 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9962 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9963
9964#ifdef VBOX_STRICT
9965 switch (cbLocal)
9966 {
9967 case sizeof(uint64_t): Assert(iElem < 2); break;
9968 case sizeof(uint32_t): Assert(iElem < 4); break;
9969 case sizeof(uint16_t): Assert(iElem < 8); break;
9970 case sizeof(uint8_t): Assert(iElem < 16); break;
9971 default: AssertFailed();
9972 }
9973#endif
9974
9975 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9976 kIemNativeGstSimdRegLdStSz_Low128,
9977 kIemNativeGstRegUse_ForUpdate);
9978 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
9979
9980 switch (cbLocal)
9981 {
9982 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9983 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9984 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9985 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9986 default: AssertFailed();
9987 }
9988
9989 /* Free but don't flush the source register. */
9990 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9991 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9992
9993 return off;
9994}
9995
9996
9997#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9998 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9999
10000/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10001DECL_INLINE_THROW(uint32_t)
10002iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10003{
10004 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10005 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10006
10007 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10008 kIemNativeGstSimdRegLdStSz_Low128,
10009 kIemNativeGstRegUse_ForUpdate);
10010 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10011
10012 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10013 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10014 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10015
10016 /* Free but don't flush the source register. */
10017 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10018 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10019
10020 return off;
10021}
10022
10023
10024#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10025 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10026
10027/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10028DECL_INLINE_THROW(uint32_t)
10029iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10030{
10031 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10032 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10033
10034 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10035 kIemNativeGstSimdRegLdStSz_Low128,
10036 kIemNativeGstRegUse_ForUpdate);
10037 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10038
10039 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10040 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10041 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10042
10043 /* Free but don't flush the source register. */
10044 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10045 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10046
10047 return off;
10048}
10049
10050
10051#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10052 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10053
10054/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10055DECL_INLINE_THROW(uint32_t)
10056iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10057 uint8_t idxSrcVar, uint8_t iDwSrc)
10058{
10059 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10060 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10061
10062 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10063 kIemNativeGstSimdRegLdStSz_Low128,
10064 kIemNativeGstRegUse_ForUpdate);
10065 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10066
10067 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10068 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10069
10070 /* Free but don't flush the destination register. */
10071 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10072 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10073
10074 return off;
10075}
10076
10077
10078#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10079 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10080
10081/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10082DECL_INLINE_THROW(uint32_t)
10083iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10084{
10085 /*
10086 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10087 * if iYRegDst gets allocated first for the full write it won't load the
10088 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10089 * duplicated from the already allocated host register for iYRegDst containing
10090 * garbage. This will be catched by the guest register value checking in debug
10091 * builds.
10092 */
10093 if (iYRegDst != iYRegSrc)
10094 {
10095 /* Allocate destination and source register. */
10096 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10097 kIemNativeGstSimdRegLdStSz_256,
10098 kIemNativeGstRegUse_ForFullWrite);
10099 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10100 kIemNativeGstSimdRegLdStSz_Low128,
10101 kIemNativeGstRegUse_ReadOnly);
10102
10103 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10104 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10105
10106 /* Free but don't flush the source and destination register. */
10107 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10108 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10109 }
10110 else
10111 {
10112 /* This effectively only clears the upper 128-bits of the register. */
10113 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10114 kIemNativeGstSimdRegLdStSz_High128,
10115 kIemNativeGstRegUse_ForFullWrite);
10116
10117 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10118
10119 /* Free but don't flush the destination register. */
10120 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10121 }
10122
10123 return off;
10124}
10125
10126
10127#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10128 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10129
10130/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10131DECL_INLINE_THROW(uint32_t)
10132iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10133{
10134 /*
10135 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10136 * if iYRegDst gets allocated first for the full write it won't load the
10137 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10138 * duplicated from the already allocated host register for iYRegDst containing
10139 * garbage. This will be catched by the guest register value checking in debug
10140 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10141 * for a zmm register we don't support yet, so this is just a nop.
10142 */
10143 if (iYRegDst != iYRegSrc)
10144 {
10145 /* Allocate destination and source register. */
10146 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10147 kIemNativeGstSimdRegLdStSz_256,
10148 kIemNativeGstRegUse_ReadOnly);
10149 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10150 kIemNativeGstSimdRegLdStSz_256,
10151 kIemNativeGstRegUse_ForFullWrite);
10152
10153 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10154
10155 /* Free but don't flush the source and destination register. */
10156 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10157 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10158 }
10159
10160 return off;
10161}
10162
10163
10164#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10165 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10166
10167/** Emits code for IEM_MC_FETCH_YREG_U128. */
10168DECL_INLINE_THROW(uint32_t)
10169iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10170{
10171 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10172 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10173
10174 Assert(iDQWord <= 1);
10175 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10176 iDQWord == 1
10177 ? kIemNativeGstSimdRegLdStSz_High128
10178 : kIemNativeGstSimdRegLdStSz_Low128,
10179 kIemNativeGstRegUse_ReadOnly);
10180
10181 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10182 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10183
10184 if (iDQWord == 1)
10185 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10186 else
10187 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10188
10189 /* Free but don't flush the source register. */
10190 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10191 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10192
10193 return off;
10194}
10195
10196
10197#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10198 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10199
10200/** Emits code for IEM_MC_FETCH_YREG_U64. */
10201DECL_INLINE_THROW(uint32_t)
10202iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10203{
10204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10205 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10206
10207 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10208 iQWord >= 2
10209 ? kIemNativeGstSimdRegLdStSz_High128
10210 : kIemNativeGstSimdRegLdStSz_Low128,
10211 kIemNativeGstRegUse_ReadOnly);
10212
10213 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10214 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10215
10216 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10217
10218 /* Free but don't flush the source register. */
10219 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10220 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10221
10222 return off;
10223}
10224
10225
10226#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10227 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10228
10229/** Emits code for IEM_MC_FETCH_YREG_U32. */
10230DECL_INLINE_THROW(uint32_t)
10231iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10232{
10233 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10234 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10235
10236 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10237 iDWord >= 4
10238 ? kIemNativeGstSimdRegLdStSz_High128
10239 : kIemNativeGstSimdRegLdStSz_Low128,
10240 kIemNativeGstRegUse_ReadOnly);
10241
10242 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10243 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10244
10245 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10246
10247 /* Free but don't flush the source register. */
10248 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10249 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10250
10251 return off;
10252}
10253
10254
10255#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10256 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10257
10258/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10259DECL_INLINE_THROW(uint32_t)
10260iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10261{
10262 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10263 kIemNativeGstSimdRegLdStSz_High128,
10264 kIemNativeGstRegUse_ForFullWrite);
10265
10266 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10267
10268 /* Free but don't flush the register. */
10269 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10270
10271 return off;
10272}
10273
10274
10275#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10276 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10277
10278/** Emits code for IEM_MC_STORE_YREG_U128. */
10279DECL_INLINE_THROW(uint32_t)
10280iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10281{
10282 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10283 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10284
10285 Assert(iDQword <= 1);
10286 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10287 iDQword == 0
10288 ? kIemNativeGstSimdRegLdStSz_Low128
10289 : kIemNativeGstSimdRegLdStSz_High128,
10290 kIemNativeGstRegUse_ForFullWrite);
10291
10292 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10293
10294 if (iDQword == 0)
10295 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10296 else
10297 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10298
10299 /* Free but don't flush the source register. */
10300 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10301 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10302
10303 return off;
10304}
10305
10306
10307#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10308 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10309
10310/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10311DECL_INLINE_THROW(uint32_t)
10312iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10313{
10314 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10315 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10316
10317 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10318 kIemNativeGstSimdRegLdStSz_256,
10319 kIemNativeGstRegUse_ForFullWrite);
10320
10321 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10322
10323 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10324 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10325
10326 /* Free but don't flush the source register. */
10327 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10328 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10329
10330 return off;
10331}
10332
10333
10334#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10335 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10336
10337/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10338DECL_INLINE_THROW(uint32_t)
10339iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10340{
10341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10342 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10343
10344 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10345 kIemNativeGstSimdRegLdStSz_256,
10346 kIemNativeGstRegUse_ForFullWrite);
10347
10348 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10349
10350 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10351 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10352
10353 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10354 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10355
10356 return off;
10357}
10358
10359
10360#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10361 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10362
10363/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10364DECL_INLINE_THROW(uint32_t)
10365iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10366{
10367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10368 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10369
10370 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10371 kIemNativeGstSimdRegLdStSz_256,
10372 kIemNativeGstRegUse_ForFullWrite);
10373
10374 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10375
10376 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10377 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10378
10379 /* Free but don't flush the source register. */
10380 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10381 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10382
10383 return off;
10384}
10385
10386
10387#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10388 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10389
10390/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10391DECL_INLINE_THROW(uint32_t)
10392iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10393{
10394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10395 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10396
10397 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10398 kIemNativeGstSimdRegLdStSz_256,
10399 kIemNativeGstRegUse_ForFullWrite);
10400
10401 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10402
10403 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10404 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10405
10406 /* Free but don't flush the source register. */
10407 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10408 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10409
10410 return off;
10411}
10412
10413
10414#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10415 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10416
10417/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10418DECL_INLINE_THROW(uint32_t)
10419iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10420{
10421 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10422 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10423
10424 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10425 kIemNativeGstSimdRegLdStSz_256,
10426 kIemNativeGstRegUse_ForFullWrite);
10427
10428 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10429
10430 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10431 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10432
10433 /* Free but don't flush the source register. */
10434 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10435 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10436
10437 return off;
10438}
10439
10440
10441#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10442 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10443
10444/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10445DECL_INLINE_THROW(uint32_t)
10446iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10447{
10448 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10449 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10450
10451 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10452 kIemNativeGstSimdRegLdStSz_256,
10453 kIemNativeGstRegUse_ForFullWrite);
10454
10455 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10456
10457 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10458
10459 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10460 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10461
10462 return off;
10463}
10464
10465
10466#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10467 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10468
10469/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10470DECL_INLINE_THROW(uint32_t)
10471iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10472{
10473 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10474 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10475
10476 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10477 kIemNativeGstSimdRegLdStSz_256,
10478 kIemNativeGstRegUse_ForFullWrite);
10479
10480 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10481
10482 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10483
10484 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10485 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10486
10487 return off;
10488}
10489
10490
10491#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10492 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10493
10494/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10495DECL_INLINE_THROW(uint32_t)
10496iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10497{
10498 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10499 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10500
10501 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10502 kIemNativeGstSimdRegLdStSz_256,
10503 kIemNativeGstRegUse_ForFullWrite);
10504
10505 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10506
10507 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10508
10509 /* Free but don't flush the source register. */
10510 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10511 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10512
10513 return off;
10514}
10515
10516
10517#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10518 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10519
10520/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10521DECL_INLINE_THROW(uint32_t)
10522iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10523{
10524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10525 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10526
10527 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10528 kIemNativeGstSimdRegLdStSz_256,
10529 kIemNativeGstRegUse_ForFullWrite);
10530
10531 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10532
10533 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10534
10535 /* Free but don't flush the source register. */
10536 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10537 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10538
10539 return off;
10540}
10541
10542
10543#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10544 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10545
10546/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10547DECL_INLINE_THROW(uint32_t)
10548iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10549{
10550 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10551 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10552
10553 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10554 kIemNativeGstSimdRegLdStSz_256,
10555 kIemNativeGstRegUse_ForFullWrite);
10556
10557 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10558
10559 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10560
10561 /* Free but don't flush the source register. */
10562 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10563 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10564
10565 return off;
10566}
10567
10568
10569#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10570 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10571
10572/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10573DECL_INLINE_THROW(uint32_t)
10574iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10575{
10576 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10577 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10578
10579 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10580 kIemNativeGstSimdRegLdStSz_256,
10581 kIemNativeGstRegUse_ForFullWrite);
10582
10583 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10584
10585 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10586 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10587
10588 /* Free but don't flush the source register. */
10589 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10590 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10591
10592 return off;
10593}
10594
10595
10596#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10597 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10598
10599/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10600DECL_INLINE_THROW(uint32_t)
10601iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10602{
10603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10605
10606 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10607 kIemNativeGstSimdRegLdStSz_256,
10608 kIemNativeGstRegUse_ForFullWrite);
10609
10610 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10611
10612 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10613 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10614
10615 /* Free but don't flush the source register. */
10616 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10617 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10618
10619 return off;
10620}
10621
10622
10623#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10624 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10625
10626/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10627DECL_INLINE_THROW(uint32_t)
10628iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10629{
10630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10631 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10632
10633 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10634 kIemNativeGstSimdRegLdStSz_256,
10635 kIemNativeGstRegUse_ForFullWrite);
10636 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10637 kIemNativeGstSimdRegLdStSz_Low128,
10638 kIemNativeGstRegUse_ReadOnly);
10639 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10640
10641 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10642 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10643 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10644
10645 /* Free but don't flush the source and destination registers. */
10646 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10647 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10648 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10649
10650 return off;
10651}
10652
10653
10654#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10655 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10656
10657/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10658DECL_INLINE_THROW(uint32_t)
10659iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10660{
10661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10663
10664 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10665 kIemNativeGstSimdRegLdStSz_256,
10666 kIemNativeGstRegUse_ForFullWrite);
10667 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10668 kIemNativeGstSimdRegLdStSz_Low128,
10669 kIemNativeGstRegUse_ReadOnly);
10670 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10671
10672 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10673 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10674 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10675
10676 /* Free but don't flush the source and destination registers. */
10677 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10678 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10679 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10680
10681 return off;
10682}
10683
10684
10685#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10686 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10687
10688
10689/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10690DECL_INLINE_THROW(uint32_t)
10691iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10692{
10693 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10694 kIemNativeGstSimdRegLdStSz_Low128,
10695 kIemNativeGstRegUse_ForUpdate);
10696
10697 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10698 if (bImm8Mask & RT_BIT(0))
10699 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10700 if (bImm8Mask & RT_BIT(1))
10701 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10702 if (bImm8Mask & RT_BIT(2))
10703 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10704 if (bImm8Mask & RT_BIT(3))
10705 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10706
10707 /* Free but don't flush the destination register. */
10708 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10709
10710 return off;
10711}
10712
10713
10714#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10715 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10716
10717#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10718 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10719
10720/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10721DECL_INLINE_THROW(uint32_t)
10722iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10723{
10724 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10725 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10726
10727 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10728 kIemNativeGstSimdRegLdStSz_256,
10729 kIemNativeGstRegUse_ReadOnly);
10730 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10731
10732 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10733
10734 /* Free but don't flush the source register. */
10735 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10736 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10737
10738 return off;
10739}
10740
10741
10742#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10743 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10744
10745#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10746 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10747
10748/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10749DECL_INLINE_THROW(uint32_t)
10750iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10751{
10752 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10754
10755 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10756 kIemNativeGstSimdRegLdStSz_256,
10757 kIemNativeGstRegUse_ForFullWrite);
10758 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10759
10760 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10761
10762 /* Free but don't flush the source register. */
10763 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10764 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10765
10766 return off;
10767}
10768
10769
10770#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10771 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10772
10773
10774/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10775DECL_INLINE_THROW(uint32_t)
10776iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10777 uint8_t idxSrcVar, uint8_t iDwSrc)
10778{
10779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10780 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10781
10782 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10783 iDwDst < 4
10784 ? kIemNativeGstSimdRegLdStSz_Low128
10785 : kIemNativeGstSimdRegLdStSz_High128,
10786 kIemNativeGstRegUse_ForUpdate);
10787 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10788 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10789
10790 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10791 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10792
10793 /* Free but don't flush the source register. */
10794 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10795 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10796 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10797
10798 return off;
10799}
10800
10801
10802#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10803 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10804
10805
10806/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10807DECL_INLINE_THROW(uint32_t)
10808iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10809 uint8_t idxSrcVar, uint8_t iQwSrc)
10810{
10811 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10812 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10813
10814 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10815 iQwDst < 2
10816 ? kIemNativeGstSimdRegLdStSz_Low128
10817 : kIemNativeGstSimdRegLdStSz_High128,
10818 kIemNativeGstRegUse_ForUpdate);
10819 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10820 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10821
10822 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10823 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10824
10825 /* Free but don't flush the source register. */
10826 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10827 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10828 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10829
10830 return off;
10831}
10832
10833
10834#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10835 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10836
10837
10838/** Emits code for IEM_MC_STORE_YREG_U64. */
10839DECL_INLINE_THROW(uint32_t)
10840iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10841{
10842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10843 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10844
10845 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10846 iQwDst < 2
10847 ? kIemNativeGstSimdRegLdStSz_Low128
10848 : kIemNativeGstSimdRegLdStSz_High128,
10849 kIemNativeGstRegUse_ForUpdate);
10850
10851 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10852
10853 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10854
10855 /* Free but don't flush the source register. */
10856 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10857 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10858
10859 return off;
10860}
10861
10862
10863#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10864 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10865
10866/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10867DECL_INLINE_THROW(uint32_t)
10868iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10869{
10870 RT_NOREF(pReNative, iYReg);
10871 /** @todo Needs to be implemented when support for AVX-512 is added. */
10872 return off;
10873}
10874
10875
10876
10877/*********************************************************************************************************************************
10878* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10879*********************************************************************************************************************************/
10880
10881/**
10882 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10883 */
10884DECL_INLINE_THROW(uint32_t)
10885iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10886{
10887 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10888 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10889 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10890 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10891
10892#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10893 /*
10894 * Need to do the FPU preparation.
10895 */
10896 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10897#endif
10898
10899 /*
10900 * Do all the call setup and cleanup.
10901 */
10902 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10903 false /*fFlushPendingWrites*/);
10904
10905 /*
10906 * Load the MXCSR register into the first argument and mask out the current exception flags.
10907 */
10908 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10909 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10910
10911 /*
10912 * Make the call.
10913 */
10914 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10915
10916 /*
10917 * The updated MXCSR is in the return register, update exception status flags.
10918 *
10919 * The return register is marked allocated as a temporary because it is required for the
10920 * exception generation check below.
10921 */
10922 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10923 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10924 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10925
10926#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10927 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10928 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_MxCsr>(pReNative, off, idxRegMxCsr);
10929#endif
10930
10931 /*
10932 * Make sure we don't have any outstanding guest register writes as we may
10933 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10934 */
10935 off = iemNativeRegFlushPendingWrites(pReNative, off);
10936
10937#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10938 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10939#else
10940 RT_NOREF(idxInstr);
10941#endif
10942
10943 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10944 * want to assume the existence for this instruction at the moment. */
10945 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10946
10947 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10948 /* tmp &= X86_MXCSR_XCPT_MASK */
10949 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10950 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10951 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10952 /* tmp = ~tmp */
10953 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10954 /* tmp &= mxcsr */
10955 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10956 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
10957 X86_MXCSR_XCPT_FLAGS);
10958
10959 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10960 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10961 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10962
10963 return off;
10964}
10965
10966
10967#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10968 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10969
10970/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10971DECL_INLINE_THROW(uint32_t)
10972iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10973{
10974 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10975 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10976 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10977}
10978
10979
10980#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10981 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10982
10983/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10984DECL_INLINE_THROW(uint32_t)
10985iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10986 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10987{
10988 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10989 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10990 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10991 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10992}
10993
10994
10995/*********************************************************************************************************************************
10996* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10997*********************************************************************************************************************************/
10998
10999#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11000 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11001
11002/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11003DECL_INLINE_THROW(uint32_t)
11004iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11005{
11006 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11007 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11008 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11009}
11010
11011
11012#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11013 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11014
11015/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11016DECL_INLINE_THROW(uint32_t)
11017iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11018 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11019{
11020 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11021 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11022 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11023 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11024}
11025
11026
11027
11028/*********************************************************************************************************************************
11029* Include instruction emitters. *
11030*********************************************************************************************************************************/
11031#include "VMMAll/target-x86/IEMAllN8veEmit-x86.h"
11032
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette