VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veRecompFuncs-x86.h@ 108640

Last change on this file since 108640 was 108640, checked in by vboxsync, 6 weeks ago

VMM/VMMAll/target-x86/IEMAllN8veRecompFuncs-x86.h: clang build fixes, bugref:10391

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.7 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs-x86.h 108640 2025-03-20 12:32:08Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits, x86 target.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#ifdef IN_RING0
38# define VBOX_VMM_TARGET_X86
39#endif
40#include <VBox/vmm/iem.h>
41#include <VBox/vmm/cpum.h>
42#include <VBox/vmm/dbgf.h>
43#include "IEMInternal.h"
44#include <VBox/vmm/vmcc.h>
45#include <VBox/log.h>
46#include <VBox/err.h>
47#include <VBox/dis.h>
48#include <VBox/param.h>
49#include <iprt/assert.h>
50#include <iprt/heap.h>
51#include <iprt/mem.h>
52#include <iprt/string.h>
53#if defined(RT_ARCH_AMD64)
54# include <iprt/x86.h>
55#elif defined(RT_ARCH_ARM64)
56# include <iprt/armv8.h>
57#endif
58
59#include "IEMInline.h"
60#include "IEMThreadedFunctions.h"
61#include "IEMN8veRecompiler.h"
62#include "IEMN8veRecompilerEmit.h"
63#include "IEMN8veRecompilerTlbLookup.h"
64#include "IEMNativeFunctions.h"
65#include "VMMAll/target-x86/IEMAllN8veEmit-x86.h"
66
67
68/*
69 * Narrow down configs here to avoid wasting time on unused configs here.
70 * Note! Same checks in IEMAllThrdRecompiler.cpp.
71 */
72
73#ifndef IEM_WITH_CODE_TLB
74# error The code TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_DATA_TLB
78# error The data TLB must be enabled for the recompiler.
79#endif
80
81
82/*********************************************************************************************************************************
83* Code emitters for flushing pending guest register writes and sanity checks *
84*********************************************************************************************************************************/
85
86#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
87
88# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
89/**
90 * Updates IEMCPU::uPcUpdatingDebug.
91 */
92DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
93{
94# ifdef RT_ARCH_AMD64
95 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
96 {
97 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
98 if ((int32_t)offDisp == offDisp || cBits != 64)
99 {
100 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
101 if (cBits == 64)
102 pCodeBuf[off++] = X86_OP_REX_W;
103 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
104 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
105 if ((int8_t)offDisp == offDisp)
106 pCodeBuf[off++] = (int8_t)offDisp;
107 else
108 {
109 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
110 off += sizeof(int32_t);
111 }
112 }
113 else
114 {
115 /* mov tmp0, imm64 */
116 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
117
118 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
119 if (cBits == 64)
120 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
121 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
122 pCodeBuf[off++] = X86_OP_REX_R;
123 pCodeBuf[off++] = 0x01;
124 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
125 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
126 }
127 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
128 return off;
129 }
130# endif
131
132 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
133 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
134
135 if (pReNative->Core.fDebugPcInitialized)
136 {
137 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
138 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
139 }
140 else
141 {
142 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 pReNative->Core.fDebugPcInitialized = true;
144 off = iemNativeEmitLoadGprWithGstRegExT<kIemNativeGstReg_Pc>(pCodeBuf, off, idxTmpReg);
145 }
146
147 if (cBits == 64)
148 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
149 else
150 {
151 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
152 if (cBits == 16)
153 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
154 }
155
156 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
157 IEMNATIVE_REG_FIXED_TMP0);
158
159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
160 iemNativeRegFreeTmp(pReNative, idxTmpReg);
161 return off;
162}
163
164
165# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
166DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
167{
168 /* Compare the shadow with the context value, they should match. */
169 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
170 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
171 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
172 return off;
173}
174# endif
175
176#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
177
178/**
179 * Flushes delayed write of a specific guest register.
180 *
181 * This must be called prior to calling CImpl functions and any helpers that use
182 * the guest state (like raising exceptions) and such.
183 *
184 * This optimization has not yet been implemented. The first target would be
185 * RIP updates, since these are the most common ones.
186 */
187template<IEMNATIVEGSTREGREF a_enmClass>
188DECL_INLINE_THROW(uint32_t)
189iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
190{
191#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
192 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
193#endif
194
195#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
196# if 0 /** @todo r=aeichner EFLAGS writeback delay. */
197 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_EFlags)
198 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
199 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
200# else
201 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
202# endif
203
204 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_Gpr)
205 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
206 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
207#endif
208
209 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_XReg)
210 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
211 {
212 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
213 /* Flush the shadows as the register needs to be reloaded (there is no
214 guarantee right now, that the referenced register doesn't change). */
215 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
216
217 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
218 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
219 }
220
221 return off;
222}
223
224
225
226/*********************************************************************************************************************************
227* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
228*********************************************************************************************************************************/
229
230#undef IEM_MC_BEGIN /* unused */
231#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
232 { \
233 Assert(pReNative->Core.bmVars == 0); \
234 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
235 Assert(pReNative->Core.bmStack == 0); \
236 pReNative->fMc = (a_fMcFlags); \
237 pReNative->fCImpl = (a_fCImplFlags); \
238 pReNative->cArgsX = (a_cArgsIncludingHidden)
239
240/** We have to get to the end in recompilation mode, as otherwise we won't
241 * generate code for all the IEM_MC_IF_XXX branches. */
242#define IEM_MC_END() \
243 iemNativeVarFreeAll(pReNative); \
244 } return off
245
246
247
248/*********************************************************************************************************************************
249* Liveness Stubs *
250*********************************************************************************************************************************/
251
252#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
253#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
254#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
255
256#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
257#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
258#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
259
260#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
261#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
262#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
263
264#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
265#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
266#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
267
268
269/*********************************************************************************************************************************
270* Native Emitter Support. *
271*********************************************************************************************************************************/
272
273#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
274
275#define IEM_MC_NATIVE_ELSE() } else {
276
277#define IEM_MC_NATIVE_ENDIF() } ((void)0)
278
279
280#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
281 off = a_fnEmitter(pReNative, off)
282
283#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
284 off = a_fnEmitter(pReNative, off, (a0))
285
286#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
287 off = a_fnEmitter(pReNative, off, (a0), (a1))
288
289#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
290 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
291
292#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
293 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
294
295#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
296 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
297
298#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
299 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
300
301#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
302 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
303
304#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
305 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
306
307#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
308 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
309
310
311#ifndef RT_ARCH_AMD64
312# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
313#else
314/** @note This is a naive approach that ASSUMES that the register isn't
315 * allocated, so it only works safely for the first allocation(s) in
316 * a MC block. */
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
318 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
319
320DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
321 uint32_t off, bool fAllocated);
322
323DECL_INLINE_THROW(uint32_t)
324iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
325{
326 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
327 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
328 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
329
330# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
331 /* Must flush the register if it hold pending writes. */
332 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
333 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
334 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
335# endif
336
337 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
338 return off;
339}
340
341#endif /* RT_ARCH_AMD64 */
342
343
344
345/*********************************************************************************************************************************
346* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
347*********************************************************************************************************************************/
348
349#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
350 pReNative->fMc = 0; \
351 pReNative->fCImpl = (a_fFlags); \
352 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
353 a_cbInstr) /** @todo not used ... */
354
355
356#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
357 pReNative->fMc = 0; \
358 pReNative->fCImpl = (a_fFlags); \
359 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
360
361DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
362 uint8_t idxInstr, uint64_t a_fGstShwFlush,
363 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
364{
365 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
366}
367
368
369#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
370 pReNative->fMc = 0; \
371 pReNative->fCImpl = (a_fFlags); \
372 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
373 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
374
375DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
376 uint8_t idxInstr, uint64_t a_fGstShwFlush,
377 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
378{
379 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
380}
381
382
383#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
384 pReNative->fMc = 0; \
385 pReNative->fCImpl = (a_fFlags); \
386 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
387 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
388
389DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
390 uint8_t idxInstr, uint64_t a_fGstShwFlush,
391 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
392 uint64_t uArg2)
393{
394 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
395}
396
397
398
399/*********************************************************************************************************************************
400* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_PC_AND_FINISH_XXX) *
401*********************************************************************************************************************************/
402
403/** Emits the flags check for IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC64_WITH_FLAGS
404 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
405DECL_INLINE_THROW(uint32_t)
406iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
407{
408 /*
409 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
410 * return with special status code and make the execution loop deal with
411 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
412 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
413 * could continue w/o interruption, it probably will drop into the
414 * debugger, so not worth the effort of trying to services it here and we
415 * just lump it in with the handling of the others.
416 *
417 * To simplify the code and the register state management even more (wrt
418 * immediate in AND operation), we always update the flags and skip the
419 * extra check associated conditional jump.
420 */
421 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
422 <= UINT32_MAX);
423#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
424 AssertMsg( pReNative->idxCurCall == 0
425 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
426 IEMLIVENESSBIT_IDX_EFL_OTHER)),
427 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
428 IEMLIVENESSBIT_IDX_EFL_OTHER)));
429#endif
430
431 /*
432 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
433 * any pending register writes must be flushed.
434 */
435 off = iemNativeRegFlushPendingWrites(pReNative, off);
436
437 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
438 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
439 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
440 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
441 X86_EFL_TF
442 | CPUMCTX_DBG_HIT_DRX_MASK
443 | CPUMCTX_DBG_DBGF_MASK);
444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
445 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
446
447 /* Free but don't flush the EFLAGS register. */
448 iemNativeRegFreeTmp(pReNative, idxEflReg);
449
450 return off;
451}
452
453
454/** Helper for iemNativeEmitFinishInstructionWithStatus. */
455DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
456{
457 unsigned const offOpcodes = pCallEntry->offOpcode;
458 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
459 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
460 {
461 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
462 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
463 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
464 }
465 AssertFailedReturn(NIL_RTGCPHYS);
466}
467
468
469/** The VINF_SUCCESS dummy. */
470template<int const a_rcNormal, bool const a_fIsJump>
471DECL_FORCE_INLINE_THROW(uint32_t)
472iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
473 int32_t const offJump)
474{
475 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
476 if (a_rcNormal != VINF_SUCCESS)
477 {
478#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
479 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
480#else
481 RT_NOREF_PV(pCallEntry);
482#endif
483
484 /* As this code returns from the TB any pending register writes must be flushed. */
485 off = iemNativeRegFlushPendingWrites(pReNative, off);
486
487 /*
488 * If we're in a conditional, mark the current branch as exiting so we
489 * can disregard its state when we hit the IEM_MC_ENDIF.
490 */
491 iemNativeMarkCurCondBranchAsExiting(pReNative);
492
493 /*
494 * Use the lookup table for getting to the next TB quickly.
495 * Note! In this code path there can only be one entry at present.
496 */
497 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
498 PCIEMTB const pTbOrg = pReNative->pTbOrg;
499 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
500 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
501
502#if 0
503 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
504 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
506 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
507 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
508
509 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
510
511#else
512 /* Load the index as argument #1 for the helper call at the given label. */
513 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
514
515 /*
516 * Figure out the physical address of the current instruction and see
517 * whether the next instruction we're about to execute is in the same
518 * page so we by can optimistically skip TLB loading.
519 *
520 * - This is safe for all cases in FLAT mode.
521 * - In segmentmented modes it is complicated, given that a negative
522 * jump may underflow EIP and a forward jump may overflow or run into
523 * CS.LIM and triggering a #GP. The only thing we can get away with
524 * now at compile time is forward jumps w/o CS.LIM checks, since the
525 * lack of CS.LIM checks means we're good for the entire physical page
526 * we're executing on and another 15 bytes before we run into CS.LIM.
527 */
528 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
529# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
530 || !(pTbOrg->fFlags & IEMTB_F_X86_CS_LIM_CHECKS)
531# endif
532 )
533 {
534 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
535 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
536 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
537 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
538
539 {
540 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
542
543 /* Load the key lookup flags into the 2nd argument for the helper call.
544 - This is safe wrt CS limit checking since we're only here for FLAT modes.
545 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
546 interrupt shadow.
547 - The NMI inhibiting is more questionable, though... */
548 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
549 * Should we copy it into fExec to simplify this? OTOH, it's just a
550 * couple of extra instructions if EFLAGS are already in a register. */
551 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
552 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
553
554 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
555 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
556 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
557 }
558 }
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
562#endif
563 }
564 return off;
565}
566
567
568#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
569 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
570 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
571
572#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
573 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
574 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577/** Same as iemRegAddToRip64AndFinishingNoFlags. */
578DECL_INLINE_THROW(uint32_t)
579iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
580{
581#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
582# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
583 if (!pReNative->Core.offPc)
584 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
585# endif
586
587 /* Allocate a temporary PC register. */
588 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
589
590 /* Perform the addition and store the result. */
591 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
592 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
593
594 /* Free but don't flush the PC register. */
595 iemNativeRegFreeTmp(pReNative, idxPcReg);
596#endif
597
598#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
599 pReNative->Core.offPc += cbInstr;
600 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
601# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
602 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
603 off = iemNativeEmitPcDebugCheck(pReNative, off);
604# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
605 off = iemNativePcAdjustCheck(pReNative, off);
606# endif
607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
608#endif
609
610 return off;
611}
612
613
614#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
615 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
617
618#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
620 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623/** Same as iemRegAddToEip32AndFinishingNoFlags. */
624DECL_INLINE_THROW(uint32_t)
625iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
626{
627#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
628# ifdef IEMNATIVE_REG_FIXED_PC_DBG
629 if (!pReNative->Core.offPc)
630 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
631# endif
632
633 /* Allocate a temporary PC register. */
634 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
635
636 /* Perform the addition and store the result. */
637 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
638 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
639
640 /* Free but don't flush the PC register. */
641 iemNativeRegFreeTmp(pReNative, idxPcReg);
642#endif
643
644#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
645 pReNative->Core.offPc += cbInstr;
646 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
647# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
648 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
649 off = iemNativeEmitPcDebugCheck(pReNative, off);
650# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
651 off = iemNativePcAdjustCheck(pReNative, off);
652# endif
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 return off;
657}
658
659
660#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
661 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
662 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
663
664#define IEM_MC_ADVANCE_PC_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
665 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669/** Same as iemRegAddToIp16AndFinishingNoFlags. */
670DECL_INLINE_THROW(uint32_t)
671iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
672{
673#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
674# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
675 if (!pReNative->Core.offPc)
676 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
677# endif
678
679 /* Allocate a temporary PC register. */
680 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
681
682 /* Perform the addition and store the result. */
683 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
684 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
685 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
686
687 /* Free but don't flush the PC register. */
688 iemNativeRegFreeTmp(pReNative, idxPcReg);
689#endif
690
691#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
692 pReNative->Core.offPc += cbInstr;
693 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
694# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
695 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
696 off = iemNativeEmitPcDebugCheck(pReNative, off);
697# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
698 off = iemNativePcAdjustCheck(pReNative, off);
699# endif
700 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
701#endif
702
703 return off;
704}
705
706
707/*********************************************************************************************************************************
708* Common code for changing PC/RIP/EIP/IP. *
709*********************************************************************************************************************************/
710
711/**
712 * Emits code to check if the content of @a idxAddrReg is a canonical address,
713 * raising a \#GP(0) if it isn't.
714 *
715 * @returns New code buffer offset, UINT32_MAX on failure.
716 * @param pReNative The native recompile state.
717 * @param off The code buffer offset.
718 * @param idxAddrReg The host register with the address to check.
719 * @param idxInstr The current instruction.
720 */
721DECL_FORCE_INLINE_THROW(uint32_t)
722iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
723{
724 /*
725 * Make sure we don't have any outstanding guest register writes as we may
726 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
727 */
728 off = iemNativeRegFlushPendingWrites(pReNative, off);
729
730#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
731 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
732#else
733 RT_NOREF(idxInstr);
734#endif
735
736#ifdef RT_ARCH_AMD64
737 /*
738 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
739 * return raisexcpt();
740 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
741 */
742 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
743
744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
745 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
746 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
747 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
748 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
749
750 iemNativeRegFreeTmp(pReNative, iTmpReg);
751
752#elif defined(RT_ARCH_ARM64)
753 /*
754 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
755 * return raisexcpt();
756 * ----
757 * mov x1, 0x800000000000
758 * add x1, x0, x1
759 * cmp xzr, x1, lsr 48
760 * b.ne .Lraisexcpt
761 */
762 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
763
764 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
765 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
766 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
767 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
768
769 iemNativeRegFreeTmp(pReNative, iTmpReg);
770
771#else
772# error "Port me"
773#endif
774 return off;
775}
776
777
778/**
779 * Emits code to check if the content of @a idxAddrReg is a canonical address,
780 * raising a \#GP(0) if it isn't.
781 *
782 * Caller makes sure everything is flushed, except maybe PC.
783 *
784 * @returns New code buffer offset, UINT32_MAX on failure.
785 * @param pReNative The native recompile state.
786 * @param off The code buffer offset.
787 * @param idxAddrReg The host register with the address to check.
788 * @param offDisp The relative displacement that has already been
789 * added to idxAddrReg and must be subtracted if
790 * raising a \#GP(0).
791 * @param idxInstr The current instruction.
792 */
793DECL_FORCE_INLINE_THROW(uint32_t)
794iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
795 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
796{
797#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
798 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
799#endif
800
801#ifdef RT_ARCH_AMD64
802 /*
803 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
804 * return raisexcpt();
805 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
806 */
807 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
808
809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
810 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
811 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
812 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
813
814#elif defined(RT_ARCH_ARM64)
815 /*
816 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
817 * return raisexcpt();
818 * ----
819 * mov x1, 0x800000000000
820 * add x1, x0, x1
821 * cmp xzr, x1, lsr 48
822 * b.ne .Lraisexcpt
823 */
824 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
825
826 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
827 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
828 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
829#else
830# error "Port me"
831#endif
832
833 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
834 uint32_t const offFixup1 = off;
835 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
836
837 /* jump .Lnoexcept; Skip the #GP code. */
838 uint32_t const offFixup2 = off;
839 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
840
841 /* .Lraisexcpt: */
842 iemNativeFixupFixedJump(pReNative, offFixup1, off);
843#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
844 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
845#else
846 RT_NOREF(idxInstr);
847#endif
848
849 /* Undo the PC adjustment and store the old PC value. */
850 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
851 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxAddrReg);
852
853 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
854
855 /* .Lnoexcept: */
856 iemNativeFixupFixedJump(pReNative, offFixup2, off);
857
858 iemNativeRegFreeTmp(pReNative, iTmpReg);
859 return off;
860}
861
862
863/**
864 * Emits code to check if the content of @a idxAddrReg is a canonical address,
865 * raising a \#GP(0) if it isn't.
866 *
867 * Caller makes sure everything is flushed, except maybe PC.
868 *
869 * @returns New code buffer offset, UINT32_MAX on failure.
870 * @param pReNative The native recompile state.
871 * @param off The code buffer offset.
872 * @param idxAddrReg The host register with the address to check.
873 * @param idxOldPcReg Register holding the old PC that offPc is relative
874 * to if available, otherwise UINT8_MAX.
875 * @param idxInstr The current instruction.
876 */
877DECL_FORCE_INLINE_THROW(uint32_t)
878iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
879 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
880{
881#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
882 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
883#endif
884
885#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
886# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
887 if (!pReNative->Core.offPc)
888# endif
889 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
890#else
891 RT_NOREF(idxInstr);
892#endif
893
894#ifdef RT_ARCH_AMD64
895 /*
896 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
897 * return raisexcpt();
898 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
899 */
900 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
901
902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
903 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
904 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
905 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
906
907#elif defined(RT_ARCH_ARM64)
908 /*
909 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
910 * return raisexcpt();
911 * ----
912 * mov x1, 0x800000000000
913 * add x1, x0, x1
914 * cmp xzr, x1, lsr 48
915 * b.ne .Lraisexcpt
916 */
917 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
918
919 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
920 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
921 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
922#else
923# error "Port me"
924#endif
925
926#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
927 if (pReNative->Core.offPc)
928 {
929 /** @todo On x86, it is said that conditional jumps forward are statically
930 * predicited as not taken, so this isn't a very good construct.
931 * Investigate whether it makes sense to invert it and add another
932 * jump. Also, find out wtf the static predictor does here on arm! */
933 uint32_t const offFixup = off;
934 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
935
936 /* .Lraisexcpt: */
937# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
938 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
939# endif
940 /* We need to update cpum.GstCtx.rip. */
941 if (idxOldPcReg == UINT8_MAX)
942 {
943 idxOldPcReg = iTmpReg;
944 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
945 }
946 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
947 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
948
949 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
950 iemNativeFixupFixedJump(pReNative, offFixup, off);
951 }
952 else
953#endif
954 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
955
956 iemNativeRegFreeTmp(pReNative, iTmpReg);
957
958 return off;
959}
960
961
962/**
963 * Emits code to check if that the content of @a idxAddrReg is within the limit
964 * of CS, raising a \#GP(0) if it isn't.
965 *
966 * @returns New code buffer offset; throws VBox status code on error.
967 * @param pReNative The native recompile state.
968 * @param off The code buffer offset.
969 * @param idxAddrReg The host register (32-bit) with the address to
970 * check.
971 * @param idxInstr The current instruction.
972 */
973DECL_FORCE_INLINE_THROW(uint32_t)
974iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
975 uint8_t idxAddrReg, uint8_t idxInstr)
976{
977 /*
978 * Make sure we don't have any outstanding guest register writes as we may
979 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
980 */
981 off = iemNativeRegFlushPendingWrites(pReNative, off);
982
983#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
984 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
985#else
986 RT_NOREF(idxInstr);
987#endif
988
989 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
990 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
991 kIemNativeGstRegUse_ReadOnly);
992
993 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
994 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
995
996 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
997 return off;
998}
999
1000
1001
1002
1003/**
1004 * Emits code to check if that the content of @a idxAddrReg is within the limit
1005 * of CS, raising a \#GP(0) if it isn't.
1006 *
1007 * Caller makes sure everything is flushed, except maybe PC.
1008 *
1009 * @returns New code buffer offset; throws VBox status code on error.
1010 * @param pReNative The native recompile state.
1011 * @param off The code buffer offset.
1012 * @param idxAddrReg The host register (32-bit) with the address to
1013 * check.
1014 * @param idxOldPcReg Register holding the old PC that offPc is relative
1015 * to if available, otherwise UINT8_MAX.
1016 * @param idxInstr The current instruction.
1017 */
1018DECL_FORCE_INLINE_THROW(uint32_t)
1019iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1020 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1021{
1022#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1023 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1024#endif
1025
1026#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1027# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1028 if (!pReNative->Core.offPc)
1029# endif
1030 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1031#else
1032 RT_NOREF(idxInstr);
1033#endif
1034
1035 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1036 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1037 kIemNativeGstRegUse_ReadOnly);
1038
1039 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1041 if (pReNative->Core.offPc)
1042 {
1043 uint32_t const offFixup = off;
1044 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1045
1046 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1047 if (idxOldPcReg == UINT8_MAX)
1048 {
1049 idxOldPcReg = idxAddrReg;
1050 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1051 }
1052 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1053 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1054# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1055 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1056# endif
1057 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1058 iemNativeFixupFixedJump(pReNative, offFixup, off);
1059 }
1060 else
1061#endif
1062 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1063
1064 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1065 return off;
1066}
1067
1068
1069/*********************************************************************************************************************************
1070* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1071*********************************************************************************************************************************/
1072
1073#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1074 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1075 (a_enmEffOpSize), pCallEntry->idxInstr); \
1076 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1082 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1083
1084#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1085 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1086 IEMMODE_16BIT, pCallEntry->idxInstr); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1093 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1094
1095#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1096 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1097 IEMMODE_64BIT, pCallEntry->idxInstr); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1104 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1105
1106
1107#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1108 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1109 (a_enmEffOpSize), pCallEntry->idxInstr); \
1110 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1116 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1117
1118#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1119 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1120 IEMMODE_16BIT, pCallEntry->idxInstr); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1127 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1128
1129#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1130 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1131 IEMMODE_64BIT, pCallEntry->idxInstr); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1138 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1139
1140/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1141 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1142 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1143template<bool const a_fWithinPage>
1144DECL_INLINE_THROW(uint32_t)
1145iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1146 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1147{
1148 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1149#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1150 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1151 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1152 {
1153 /* No #GP checking required, just update offPc and get on with it. */
1154 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1155# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1156 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1157# endif
1158 }
1159 else
1160#endif
1161 {
1162 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1163 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1164 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1165
1166 /* Allocate a temporary PC register. */
1167 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1168 kIemNativeGstRegUse_ForUpdate);
1169
1170 /* Perform the addition. */
1171 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1172
1173 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1174 {
1175 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1176 We can skip this if the target is within the same page. */
1177 if (!a_fWithinPage)
1178 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1179 (int64_t)offDisp + cbInstr, idxInstr);
1180 }
1181 else
1182 {
1183 /* Just truncate the result to 16-bit IP. */
1184 Assert(enmEffOpSize == IEMMODE_16BIT);
1185 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1186 }
1187
1188#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1189# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1190 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1191 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1192# endif
1193 /* Since we've already got the new PC value in idxPcReg, we can just as
1194 well write it out and reset offPc to zero. Otherwise, we'd need to use
1195 a copy the shadow PC, which will cost another move instruction here. */
1196# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1197 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1198 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1199 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1200 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1201 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1202 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1203# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1204 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1205 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1206# endif
1207# endif
1208 pReNative->Core.offPc = 0;
1209#endif
1210
1211 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1212
1213 /* Free but don't flush the PC register. */
1214 iemNativeRegFreeTmp(pReNative, idxPcReg);
1215 }
1216 return off;
1217}
1218
1219
1220#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1221 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1222 (a_enmEffOpSize), pCallEntry->idxInstr); \
1223 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1229 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1230
1231#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1232 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1233 IEMMODE_16BIT, pCallEntry->idxInstr); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1240 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1241
1242#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1243 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1244 IEMMODE_32BIT, pCallEntry->idxInstr); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1251 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1252
1253
1254#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1255 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1256 (a_enmEffOpSize), pCallEntry->idxInstr); \
1257 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1263 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1264
1265#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1266 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1267 IEMMODE_16BIT, pCallEntry->idxInstr); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1274 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1275
1276#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1277 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1278 IEMMODE_32BIT, pCallEntry->idxInstr); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1285 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1286
1287/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1288 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1289 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1290template<bool const a_fFlat>
1291DECL_INLINE_THROW(uint32_t)
1292iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1293 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1294{
1295 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1296#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1297 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1298#endif
1299
1300 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1301 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1302 {
1303 off = iemNativeRegFlushPendingWrites(pReNative, off);
1304#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1305 Assert(pReNative->Core.offPc == 0);
1306#endif
1307 }
1308
1309 /* Allocate a temporary PC register. */
1310 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1311
1312 /* Perform the addition. */
1313#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1314 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1315#else
1316 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1317#endif
1318
1319 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1320 if (enmEffOpSize == IEMMODE_16BIT)
1321 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1322
1323 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1324 if (!a_fFlat)
1325 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1326
1327 /* Commit it. */
1328#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1329 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1330 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1331#endif
1332
1333 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1334#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1335 pReNative->Core.offPc = 0;
1336#endif
1337
1338 /* Free but don't flush the PC register. */
1339 iemNativeRegFreeTmp(pReNative, idxPcReg);
1340
1341 return off;
1342}
1343
1344
1345#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1346 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1347 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1348
1349#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1350 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1351 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1357
1358#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1359 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1360 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1366
1367#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1368 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1373DECL_INLINE_THROW(uint32_t)
1374iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1375 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1376{
1377 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1378 off = iemNativeRegFlushPendingWrites(pReNative, off);
1379
1380#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1381 Assert(pReNative->Core.offPc == 0);
1382 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1383#endif
1384
1385 /* Allocate a temporary PC register. */
1386 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1387
1388 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1389 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1390 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1391 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1392#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1393 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1394 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1395#endif
1396 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1397
1398 /* Free but don't flush the PC register. */
1399 iemNativeRegFreeTmp(pReNative, idxPcReg);
1400
1401 return off;
1402}
1403
1404
1405
1406/*********************************************************************************************************************************
1407* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_IND_JMP_UXX_AND_FINISH). *
1408*********************************************************************************************************************************/
1409
1410/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for pre-386 targets. */
1411#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1412 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1413
1414/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for 386+ targets. */
1415#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1416 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1417
1418/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for use in 64-bit code. */
1419#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1420 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1421
1422/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for pre-386 targets that checks and
1423 * clears flags. */
1424#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1425 IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1426 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1427
1428/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for 386+ targets that checks and
1429 * clears flags. */
1430#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1431 IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1432 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1433
1434/** Variant of IEM_MC_IND_JMP_U16_AND_FINISH for use in 64-bit code that checks and
1435 * clears flags. */
1436#define IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1437 IEM_MC_IND_JMP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1438 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1439
1440#undef IEM_MC_IND_JMP_U16_AND_FINISH
1441
1442
1443/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for 386+ targets. */
1444#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1445 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1446
1447/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for use in 64-bit code. */
1448#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1450
1451/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for 386+ targets that checks and
1452 * clears flags. */
1453#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1454 IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1455 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1456
1457/** Variant of IEM_MC_IND_JMP_U32_AND_FINISH for use in 64-bit code that checks
1458 * and clears flags. */
1459#define IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1460 IEM_MC_IND_JMP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1461 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1462
1463#undef IEM_MC_IND_JMP_U32_AND_FINISH
1464
1465
1466/** Variant of IEM_MC_IND_JMP_U64_AND_FINISH for use in 64-bit code. */
1467#define IEM_MC_IND_JMP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1468 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1469
1470/** Variant of IEM_MC_IND_JMP_U64_AND_FINISH for use in 64-bit code that checks
1471 * and clears flags. */
1472#define IEM_MC_IND_JMP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1473 IEM_MC_IND_JMP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1474 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1475
1476#undef IEM_MC_IND_JMP_U64_AND_FINISH
1477
1478
1479/** Same as iemRegRipJumpU16AndFinishNoFlags,
1480 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1481DECL_INLINE_THROW(uint32_t)
1482iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1483 uint8_t idxInstr, uint8_t cbVar)
1484{
1485 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1486 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1487
1488 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1489 PC which will be handled specially by the two workers below if they raise a GP. */
1490 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1491 uint8_t const idxOldPcReg = fMayRaiseGp0
1492 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1493 : UINT8_MAX;
1494 if (fMayRaiseGp0)
1495 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1496
1497 /* Get a register with the new PC loaded from idxVarPc.
1498 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1499 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1500
1501 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1502 if (fMayRaiseGp0)
1503 {
1504 if (f64Bit)
1505 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1506 else
1507 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1508 }
1509
1510 /* Store the result. */
1511 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1512
1513#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1514 pReNative->Core.offPc = 0;
1515 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1516# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1518 pReNative->Core.fDebugPcInitialized = true;
1519 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1520# endif
1521#endif
1522
1523 if (idxOldPcReg != UINT8_MAX)
1524 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1525 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1526 /** @todo implictly free the variable? */
1527
1528 return off;
1529}
1530
1531
1532
1533/*********************************************************************************************************************************
1534* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1535*********************************************************************************************************************************/
1536
1537/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1538 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1539DECL_FORCE_INLINE_THROW(uint32_t)
1540iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1541{
1542 /* Use16BitSp: */
1543#ifdef RT_ARCH_AMD64
1544 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1545 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1546#else
1547 /* sub regeff, regrsp, #cbMem */
1548 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1549 /* and regeff, regeff, #0xffff */
1550 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1551 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1552 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1553 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1554#endif
1555 return off;
1556}
1557
1558
1559DECL_FORCE_INLINE(uint32_t)
1560iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1561{
1562 /* Use32BitSp: */
1563 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1564 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1565 return off;
1566}
1567
1568
1569template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1570DECL_INLINE_THROW(uint32_t)
1571iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1572 uintptr_t pfnFunction, uint8_t idxInstr)
1573{
1574 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
1575 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
1576
1577 /*
1578 * Assert sanity.
1579 */
1580#ifdef VBOX_STRICT
1581 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1582 {
1583 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1584 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1585 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1586 Assert( pfnFunction
1587 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1588 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1589 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1590 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1591 : UINT64_C(0xc000b000a0009000) ));
1592 }
1593 else
1594 Assert( pfnFunction
1595 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1596 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1597 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1598 : UINT64_C(0xc000b000a0009000) ));
1599#endif
1600
1601#ifdef VBOX_STRICT
1602 /*
1603 * Check that the fExec flags we've got make sense.
1604 */
1605 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1606#endif
1607
1608 /*
1609 * To keep things simple we have to commit any pending writes first as we
1610 * may end up making calls.
1611 */
1612 /** @todo we could postpone this till we make the call and reload the
1613 * registers after returning from the call. Not sure if that's sensible or
1614 * not, though. */
1615 off = iemNativeRegFlushPendingWrites(pReNative, off);
1616
1617 /*
1618 * First we calculate the new RSP and the effective stack pointer value.
1619 * For 64-bit mode and flat 32-bit these two are the same.
1620 * (Code structure is very similar to that of PUSH)
1621 */
1622 RT_CONSTEXPR
1623 uint8_t const cbMem = a_cBitsVar / 8;
1624 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1625 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1626 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1627 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1628 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1629 {
1630 Assert(idxRegEffSp == idxRegRsp);
1631 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1632 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1633 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1634 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1635 else
1636 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1637 }
1638 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1639 {
1640 Assert(idxRegEffSp != idxRegRsp);
1641 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1642 kIemNativeGstRegUse_ReadOnly);
1643#ifdef RT_ARCH_AMD64
1644 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1645#else
1646 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1647#endif
1648 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1649 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1650 offFixupJumpToUseOtherBitSp = off;
1651 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
1652 {
1653 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1654 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1655 }
1656 else
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1659 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1662 }
1663 /* SpUpdateEnd: */
1664 uint32_t const offLabelSpUpdateEnd = off;
1665
1666 /*
1667 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1668 * we're skipping lookup).
1669 */
1670 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1671 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1672 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1673 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1674 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1675 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1676 : UINT32_MAX;
1677 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1678
1679
1680 if (!TlbState.fSkip)
1681 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1682 else
1683 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1684
1685 /*
1686 * Use16BitSp:
1687 */
1688 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1689 {
1690#ifdef RT_ARCH_AMD64
1691 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1692#else
1693 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1694#endif
1695 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1696 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
1697 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1698 else
1699 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1700 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1702 }
1703
1704 /*
1705 * TlbMiss:
1706 *
1707 * Call helper to do the pushing.
1708 */
1709 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1710
1711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1713#else
1714 RT_NOREF(idxInstr);
1715#endif
1716
1717 /* Save variables in volatile registers. */
1718 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1719 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1720 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1721 | (RT_BIT_32(idxRegPc));
1722 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1723
1724 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1725 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1726 {
1727 /* Swap them using ARG0 as temp register: */
1728 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1731 }
1732 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1733 {
1734 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1736
1737 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1738 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1740 }
1741 else
1742 {
1743 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745
1746 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1748 }
1749
1750#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1751 /* Do delayed EFLAGS calculations. */
1752 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1753 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1754#endif
1755
1756 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1757 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1758
1759 /* Done setting up parameters, make the call. */
1760 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1761
1762 /* Restore variables and guest shadow registers to volatile registers. */
1763 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1764 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1765
1766#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1767 if (!TlbState.fSkip)
1768 {
1769 /* end of TlbMiss - Jump to the done label. */
1770 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1771 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1772
1773 /*
1774 * TlbLookup:
1775 */
1776 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
1777 idxLabelTlbLookup, idxLabelTlbMiss,
1778 idxRegMemResult);
1779
1780 /*
1781 * Emit code to do the actual storing / fetching.
1782 */
1783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1784# ifdef IEM_WITH_TLB_STATISTICS
1785 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1786 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1787# endif
1788 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1789 if RT_CONSTEXPR_IF(cbMem == 2)
1790 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1791 else if RT_CONSTEXPR_IF(cbMem == 4)
1792 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1793 else
1794 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1795
1796 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1797 TlbState.freeRegsAndReleaseVars(pReNative);
1798
1799 /*
1800 * TlbDone:
1801 *
1802 * Commit the new RSP value.
1803 */
1804 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1805 }
1806#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1807
1808#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1809 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
1810#endif
1811 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1812 if (idxRegEffSp != idxRegRsp)
1813 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1814
1815 return off;
1816}
1817
1818
1819/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1820#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1821 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1822
1823/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1824 * clears flags. */
1825#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1826 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1827 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1828
1829/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1830#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1831 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1832
1833/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1834 * clears flags. */
1835#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1836 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1837 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1838
1839#undef IEM_MC_IND_CALL_U16_AND_FINISH
1840
1841
1842/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1843#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1844 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1845
1846/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1847 * clears flags. */
1848#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1849 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1850 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1851
1852#undef IEM_MC_IND_CALL_U32_AND_FINISH
1853
1854
1855/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1856 * an extra parameter, for use in 64-bit code. */
1857#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1858 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1859
1860
1861/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1862 * an extra parameter, for use in 64-bit code and we need to check and clear
1863 * flags. */
1864#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1865 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1866 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1867
1868#undef IEM_MC_IND_CALL_U64_AND_FINISH
1869
1870/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1871 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1872DECL_INLINE_THROW(uint32_t)
1873iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1874 uint8_t idxInstr, uint8_t cbVar)
1875{
1876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1877 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1878
1879 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1880 off = iemNativeRegFlushPendingWrites(pReNative, off);
1881
1882#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1883 Assert(pReNative->Core.offPc == 0);
1884 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1885#endif
1886
1887 /* Get a register with the new PC loaded from idxVarPc.
1888 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1889 uint8_t const idxPcRegNew = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1890
1891 /* Check limit (may #GP(0) + exit TB). */
1892 if (!f64Bit)
1893/** @todo we can skip this test in FLAT 32-bit mode. */
1894 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1895 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1896 else if (cbVar > sizeof(uint32_t))
1897 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1898
1899#if 1
1900 /* Allocate a temporary PC register, we don't want it shadowed. */
1901 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1902 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1903#else
1904 /* Allocate a temporary PC register. */
1905 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1906 true /*fNoVolatileRegs*/);
1907#endif
1908
1909 /* Perform the addition and push the variable to the guest stack. */
1910 /** @todo Flat variants for PC32 variants. */
1911 switch (cbVar)
1912 {
1913 case sizeof(uint16_t):
1914 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1915 /* Truncate the result to 16-bit IP. */
1916 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1917 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1918 break;
1919 case sizeof(uint32_t):
1920 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1921 /** @todo In FLAT mode we can use the flat variant. */
1922 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1923 break;
1924 case sizeof(uint64_t):
1925 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1926 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1927 break;
1928 default:
1929 AssertFailed();
1930 }
1931
1932 /* RSP got changed, so do this again. */
1933 off = iemNativeRegFlushPendingWrites(pReNative, off);
1934
1935 /* Store the result. */
1936 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
1937#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1938 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1939 pReNative->Core.fDebugPcInitialized = true;
1940 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1941#endif
1942
1943#if 1
1944 /* Need to transfer the shadow information to the new RIP register. */
1945 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1946#else
1947 /* Sync the new PC. */
1948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxPcRegNew);
1949#endif
1950 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1951 iemNativeRegFreeTmp(pReNative, idxPcReg);
1952 /** @todo implictly free the variable? */
1953
1954 return off;
1955}
1956
1957
1958/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1959 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1960#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1961 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1962
1963/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1964 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1965 * flags. */
1966#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1967 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1968 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1969
1970/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1971 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1972#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1973 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1974
1975/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1976 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1977 * flags. */
1978#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1979 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1980 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1981
1982/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1983 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1984#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1985 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1986
1987/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1988 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1989 * flags. */
1990#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1991 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1992 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1993
1994#undef IEM_MC_REL_CALL_S16_AND_FINISH
1995
1996/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1997 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1998DECL_INLINE_THROW(uint32_t)
1999iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2000 uint8_t idxInstr)
2001{
2002 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2003 off = iemNativeRegFlushPendingWrites(pReNative, off);
2004
2005#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2006 Assert(pReNative->Core.offPc == 0);
2007 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2008#endif
2009
2010 /* Allocate a temporary PC register. */
2011 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2012 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2013 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2014
2015 /* Calculate the new RIP. */
2016 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2017 /* Truncate the result to 16-bit IP. */
2018 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2019 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2020 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2021
2022 /* Truncate the result to 16-bit IP. */
2023 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2024
2025 /* Check limit (may #GP(0) + exit TB). */
2026 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2027
2028 /* Perform the addition and push the variable to the guest stack. */
2029 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2030
2031 /* RSP got changed, so flush again. */
2032 off = iemNativeRegFlushPendingWrites(pReNative, off);
2033
2034 /* Store the result. */
2035 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2036#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2037 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2038 pReNative->Core.fDebugPcInitialized = true;
2039 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2040#endif
2041
2042 /* Need to transfer the shadow information to the new RIP register. */
2043 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2044 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2045 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2046
2047 return off;
2048}
2049
2050
2051/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2052 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2053#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2054 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2055
2056/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2057 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2058 * flags. */
2059#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2060 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2061 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2062
2063#undef IEM_MC_REL_CALL_S32_AND_FINISH
2064
2065/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2066 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2067DECL_INLINE_THROW(uint32_t)
2068iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2069 uint8_t idxInstr)
2070{
2071 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2072 off = iemNativeRegFlushPendingWrites(pReNative, off);
2073
2074#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2075 Assert(pReNative->Core.offPc == 0);
2076 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2077#endif
2078
2079 /* Allocate a temporary PC register. */
2080 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2081 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2082 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2083
2084 /* Update the EIP to get the return address. */
2085 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2086
2087 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2088 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2089 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2090 /** @todo we can skip this test in FLAT 32-bit mode. */
2091 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2092
2093 /* Perform Perform the return address to the guest stack. */
2094 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2095 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2096
2097 /* RSP got changed, so do this again. */
2098 off = iemNativeRegFlushPendingWrites(pReNative, off);
2099
2100 /* Store the result. */
2101 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2102#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2103 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2104 pReNative->Core.fDebugPcInitialized = true;
2105 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2106#endif
2107
2108 /* Need to transfer the shadow information to the new RIP register. */
2109 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2110 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2111 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2112
2113 return off;
2114}
2115
2116
2117/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2118 * an extra parameter, for use in 64-bit code. */
2119#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2120 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2121
2122/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2123 * an extra parameter, for use in 64-bit code and we need to check and clear
2124 * flags. */
2125#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2126 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2127 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2128
2129#undef IEM_MC_REL_CALL_S64_AND_FINISH
2130
2131/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2132 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2133DECL_INLINE_THROW(uint32_t)
2134iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2135 uint8_t idxInstr)
2136{
2137 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2138 off = iemNativeRegFlushPendingWrites(pReNative, off);
2139
2140#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2141 Assert(pReNative->Core.offPc == 0);
2142 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2143#endif
2144
2145 /* Allocate a temporary PC register. */
2146 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2147 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2148 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2149
2150 /* Update the RIP to get the return address. */
2151 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2152
2153 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2154 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2155 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2156 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2157
2158 /* Perform Perform the return address to the guest stack. */
2159 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2160
2161 /* RSP got changed, so do this again. */
2162 off = iemNativeRegFlushPendingWrites(pReNative, off);
2163
2164 /* Store the result. */
2165 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2166#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2167 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2168 pReNative->Core.fDebugPcInitialized = true;
2169 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2170#endif
2171
2172 /* Need to transfer the shadow information to the new RIP register. */
2173 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2174 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2175 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2176
2177 return off;
2178}
2179
2180
2181/*********************************************************************************************************************************
2182* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2183*********************************************************************************************************************************/
2184
2185DECL_FORCE_INLINE_THROW(uint32_t)
2186iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2187 uint16_t cbPopAdd, uint8_t idxRegTmp)
2188{
2189 /* Use16BitSp: */
2190#ifdef RT_ARCH_AMD64
2191 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2192 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2193 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2194 RT_NOREF(idxRegTmp);
2195
2196#elif defined(RT_ARCH_ARM64)
2197 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2198 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2199 /* add tmp, regrsp, #cbMem */
2200 uint16_t const cbCombined = cbMem + cbPopAdd;
2201 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2202 if (cbCombined >= RT_BIT_32(12))
2203 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2204 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2205 /* and tmp, tmp, #0xffff */
2206 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2207 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2208 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2209 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2210
2211#else
2212# error "Port me"
2213#endif
2214 return off;
2215}
2216
2217
2218DECL_FORCE_INLINE_THROW(uint32_t)
2219iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2220 uint16_t cbPopAdd)
2221{
2222 /* Use32BitSp: */
2223 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2224 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2225 return off;
2226}
2227
2228
2229/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2230#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2231 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2232
2233/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2234#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2235 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2236 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2237 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2238 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2239
2240/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2241#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2242 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2243 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2244 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2245 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2246
2247/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2248 * clears flags. */
2249#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2250 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2251 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2252
2253/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2254 * clears flags. */
2255#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2256 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2257 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2258
2259/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2260 * clears flags. */
2261#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2262 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2263 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2264
2265/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2266template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2267DECL_INLINE_THROW(uint32_t)
2268iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2269{
2270 RT_NOREF(cbInstr);
2271 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2272
2273#ifdef VBOX_STRICT
2274 /*
2275 * Check that the fExec flags we've got make sense.
2276 */
2277 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2278#endif
2279
2280 /*
2281 * To keep things simple we have to commit any pending writes first as we
2282 * may end up making calls.
2283 */
2284 off = iemNativeRegFlushPendingWrites(pReNative, off);
2285
2286 /*
2287 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2288 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2289 * directly as the effective stack pointer.
2290 *
2291 * (Code structure is very similar to that of PUSH)
2292 *
2293 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2294 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2295 * aren't commonly used (or useful) and thus not in need of optimizing.
2296 *
2297 * Note! For non-flat modes the guest RSP is not allocated for update but
2298 * rather for calculation as the shadowed register would remain modified
2299 * even if the return address throws a #GP(0) due to being outside the
2300 * CS limit causing a wrong stack pointer value in the guest (see the
2301 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2302 * the shadowing is transfered to the new register returned by
2303 * iemNativeRegAllocTmpForGuestReg() at the end.
2304 */
2305 RT_CONSTEXPR
2306 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2307 ? sizeof(uint64_t)
2308 : a_enmEffOpSize == IEMMODE_32BIT
2309 ? sizeof(uint32_t)
2310 : sizeof(uint16_t);
2311/** @todo the basic flatness could be detected by the threaded compiler step
2312 * like for the other macros... worth it? */
2313 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2314 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2315 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2316 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2317 : fFlat
2318 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2319 : a_enmEffOpSize == IEMMODE_32BIT
2320 ? (uintptr_t)iemNativeHlpStackFetchU32
2321 : (uintptr_t)iemNativeHlpStackFetchU16;
2322 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2323 fFlat ? kIemNativeGstRegUse_ForUpdate
2324 : kIemNativeGstRegUse_Calculation,
2325 true /*fNoVolatileRegs*/);
2326 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2327 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2328 * will be the resulting register value. */
2329 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2330
2331 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2332 if (fFlat)
2333 Assert(idxRegEffSp == idxRegRsp);
2334 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2335 {
2336 Assert(idxRegEffSp != idxRegRsp);
2337 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2338 kIemNativeGstRegUse_ReadOnly);
2339#ifdef RT_ARCH_AMD64
2340 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2341#else
2342 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2343#endif
2344 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2345 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2346 offFixupJumpToUseOtherBitSp = off;
2347 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2348 {
2349 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2350 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2351 }
2352 else
2353 {
2354 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2355 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2356 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2357 idxRegMemResult);
2358 }
2359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2360 }
2361 /* SpUpdateEnd: */
2362 uint32_t const offLabelSpUpdateEnd = off;
2363
2364 /*
2365 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2366 * we're skipping lookup).
2367 */
2368 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2369 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2370 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2371 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2372 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2373 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2374 : UINT32_MAX;
2375
2376 if (!TlbState.fSkip)
2377 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2378 else
2379 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2380
2381 /*
2382 * Use16BitSp:
2383 */
2384 if (!fFlat)
2385 {
2386#ifdef RT_ARCH_AMD64
2387 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2388#else
2389 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2390#endif
2391 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2392 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
2393 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2394 idxRegMemResult);
2395 else
2396 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2397 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2399 }
2400
2401 /*
2402 * TlbMiss:
2403 *
2404 * Call helper to do the pushing.
2405 */
2406 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2407
2408#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2409 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2410#else
2411 RT_NOREF(idxInstr);
2412#endif
2413
2414 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2415 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2416 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2417 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2418
2419
2420 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2421 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2422 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2423
2424#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2425 /* Do delayed EFLAGS calculations. */
2426 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2427#endif
2428
2429 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2430 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2431
2432 /* Done setting up parameters, make the call. */
2433 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2434
2435 /* Move the return register content to idxRegMemResult. */
2436 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2438
2439 /* Restore variables and guest shadow registers to volatile registers. */
2440 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2441 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2442
2443#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2444 if (!TlbState.fSkip)
2445 {
2446 /* end of TlbMiss - Jump to the done label. */
2447 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2448 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2449
2450 /*
2451 * TlbLookup:
2452 */
2453 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
2454 idxLabelTlbLookup, idxLabelTlbMiss,
2455 idxRegMemResult);
2456
2457 /*
2458 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2459 */
2460 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2461# ifdef IEM_WITH_TLB_STATISTICS
2462 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2463 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2464# endif
2465 switch (cbMem)
2466 {
2467 case 2:
2468 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2469 break;
2470 case 4:
2471 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2472 break;
2473 case 8:
2474 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2475 break;
2476 default:
2477 AssertFailed();
2478 }
2479
2480 TlbState.freeRegsAndReleaseVars(pReNative);
2481
2482 /*
2483 * TlbDone:
2484 *
2485 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2486 * commit the popped register value.
2487 */
2488 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2489 }
2490#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2491
2492 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2493 if RT_CONSTEXPR_IF(!a_f64Bit)
2494/** @todo we can skip this test in FLAT 32-bit mode. */
2495 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2496 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2497 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2498 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2499
2500 /* Complete RSP calculation for FLAT mode. */
2501 if (idxRegEffSp == idxRegRsp)
2502 {
2503 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2504 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2505 else
2506 {
2507 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2508 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2509 }
2510 }
2511
2512 /* Commit the result and clear any current guest shadows for RIP. */
2513 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
2514 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>( pReNative, off, idxRegMemResult);
2515 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2518 pReNative->Core.fDebugPcInitialized = true;
2519 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2520#endif
2521
2522 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2523 if (!fFlat)
2524 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2525
2526 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2527 if (idxRegEffSp != idxRegRsp)
2528 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2529 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2530 return off;
2531}
2532
2533
2534/*********************************************************************************************************************************
2535* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2536*********************************************************************************************************************************/
2537
2538#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2539 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2540
2541/**
2542 * Emits code to check if a \#NM exception should be raised.
2543 *
2544 * @returns New code buffer offset, UINT32_MAX on failure.
2545 * @param pReNative The native recompile state.
2546 * @param off The code buffer offset.
2547 * @param idxInstr The current instruction.
2548 */
2549DECL_INLINE_THROW(uint32_t)
2550iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2551{
2552 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2553
2554 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2555 {
2556 /*
2557 * Make sure we don't have any outstanding guest register writes as we may
2558 * raise an #NM and all guest register must be up to date in CPUMCTX.
2559 */
2560 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2561 off = iemNativeRegFlushPendingWrites(pReNative, off);
2562
2563#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2564 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2565#else
2566 RT_NOREF(idxInstr);
2567#endif
2568
2569 /* Allocate a temporary CR0 register. */
2570 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2571 kIemNativeGstRegUse_ReadOnly);
2572
2573 /*
2574 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2575 * return raisexcpt();
2576 */
2577 /* Test and jump. */
2578 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2579 X86_CR0_EM | X86_CR0_TS);
2580
2581 /* Free but don't flush the CR0 register. */
2582 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2583
2584 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2585 }
2586 else
2587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2588
2589 return off;
2590}
2591
2592
2593#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2594 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2595
2596/**
2597 * Emits code to check if a \#NM exception should be raised.
2598 *
2599 * @returns New code buffer offset, UINT32_MAX on failure.
2600 * @param pReNative The native recompile state.
2601 * @param off The code buffer offset.
2602 * @param idxInstr The current instruction.
2603 */
2604DECL_INLINE_THROW(uint32_t)
2605iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2606{
2607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2608
2609 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2610 {
2611 /*
2612 * Make sure we don't have any outstanding guest register writes as we may
2613 * raise an #NM and all guest register must be up to date in CPUMCTX.
2614 */
2615 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2616 off = iemNativeRegFlushPendingWrites(pReNative, off);
2617
2618#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2619 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2620#else
2621 RT_NOREF(idxInstr);
2622#endif
2623
2624 /* Allocate a temporary CR0 register. */
2625 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2626 kIemNativeGstRegUse_Calculation);
2627
2628 /*
2629 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2630 * return raisexcpt();
2631 */
2632 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2633 /* Test and jump. */
2634 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2635
2636 /* Free the CR0 register. */
2637 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2638
2639 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2640 }
2641 else
2642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2643
2644 return off;
2645}
2646
2647
2648#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2649 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2650
2651/**
2652 * Emits code to check if a \#MF exception should be raised.
2653 *
2654 * @returns New code buffer offset, UINT32_MAX on failure.
2655 * @param pReNative The native recompile state.
2656 * @param off The code buffer offset.
2657 * @param idxInstr The current instruction.
2658 */
2659DECL_INLINE_THROW(uint32_t)
2660iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2661{
2662 /*
2663 * Make sure we don't have any outstanding guest register writes as we may
2664 * raise an #MF and all guest register must be up to date in CPUMCTX.
2665 */
2666 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2667 off = iemNativeRegFlushPendingWrites(pReNative, off);
2668
2669#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2670 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2671#else
2672 RT_NOREF(idxInstr);
2673#endif
2674
2675 /* Allocate a temporary FSW register. */
2676 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2677 kIemNativeGstRegUse_ReadOnly);
2678
2679 /*
2680 * if (FSW & X86_FSW_ES != 0)
2681 * return raisexcpt();
2682 */
2683 /* Test and jump. */
2684 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2685
2686 /* Free but don't flush the FSW register. */
2687 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2688
2689 return off;
2690}
2691
2692
2693#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2694 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2695
2696/**
2697 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2698 *
2699 * @returns New code buffer offset, UINT32_MAX on failure.
2700 * @param pReNative The native recompile state.
2701 * @param off The code buffer offset.
2702 * @param idxInstr The current instruction.
2703 */
2704DECL_INLINE_THROW(uint32_t)
2705iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2706{
2707 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2708
2709 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2710 {
2711 /*
2712 * Make sure we don't have any outstanding guest register writes as we may
2713 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2714 */
2715 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2716 off = iemNativeRegFlushPendingWrites(pReNative, off);
2717
2718#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2719 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2720#else
2721 RT_NOREF(idxInstr);
2722#endif
2723
2724 /* Allocate a temporary CR0 and CR4 register. */
2725 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2726 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2727 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2728
2729 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2730#ifdef RT_ARCH_AMD64
2731 /*
2732 * We do a modified test here:
2733 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2734 * else { goto RaiseSseRelated; }
2735 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2736 * all targets except the 386, which doesn't support SSE, this should
2737 * be a safe assumption.
2738 */
2739 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2740 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2741 //pCodeBuf[off++] = 0xcc;
2742 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2743 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2744 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2745 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2746 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2747 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2748
2749#elif defined(RT_ARCH_ARM64)
2750 /*
2751 * We do a modified test here:
2752 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2753 * else { goto RaiseSseRelated; }
2754 */
2755 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2756 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2757 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2758 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2759 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2760 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2761 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2762 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2763 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2764 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2765 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2766 idxTmpReg, false /*f64Bit*/);
2767
2768#else
2769# error "Port me!"
2770#endif
2771
2772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2773 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2774 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2775 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2776
2777 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2778 }
2779 else
2780 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2781
2782 return off;
2783}
2784
2785
2786#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2787 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2788
2789/**
2790 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2791 *
2792 * @returns New code buffer offset, UINT32_MAX on failure.
2793 * @param pReNative The native recompile state.
2794 * @param off The code buffer offset.
2795 * @param idxInstr The current instruction.
2796 */
2797DECL_INLINE_THROW(uint32_t)
2798iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2799{
2800 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2801
2802 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2803 {
2804 /*
2805 * Make sure we don't have any outstanding guest register writes as we may
2806 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2807 */
2808 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2809 off = iemNativeRegFlushPendingWrites(pReNative, off);
2810
2811#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2812 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2813#else
2814 RT_NOREF(idxInstr);
2815#endif
2816
2817 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2818 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2819 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2820 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2821 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2822
2823 /*
2824 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2825 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2826 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2827 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2828 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2829 * { likely }
2830 * else { goto RaiseAvxRelated; }
2831 */
2832#ifdef RT_ARCH_AMD64
2833 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2834 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2835 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2836 ^ 0x1a) ) { likely }
2837 else { goto RaiseAvxRelated; } */
2838 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2839 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2840 //pCodeBuf[off++] = 0xcc;
2841 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2842 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2843 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2844 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2845 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2846 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2847 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2848 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2849 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2850 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2851 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2852
2853#elif defined(RT_ARCH_ARM64)
2854 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2855 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2856 else { goto RaiseAvxRelated; } */
2857 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2858 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2859 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2860 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2861 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2862 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2863 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2864 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2865 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2866 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2867 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2868 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2869 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2870 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2871 idxTmpReg, false /*f64Bit*/);
2872
2873#else
2874# error "Port me!"
2875#endif
2876
2877 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2878 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2879 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2880 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2881
2882 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2883 }
2884 else
2885 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2886
2887 return off;
2888}
2889
2890
2891#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2892 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2893
2894/**
2895 * Emits code to raise a \#DE if a local variable is zero.
2896 *
2897 * @returns New code buffer offset, UINT32_MAX on failure.
2898 * @param pReNative The native recompile state.
2899 * @param off The code buffer offset.
2900 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2901 * @param idxInstr The current instruction.
2902 */
2903DECL_INLINE_THROW(uint32_t)
2904iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2905{
2906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2907 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2908
2909 /* Make sure we don't have any outstanding guest register writes as we may. */
2910 off = iemNativeRegFlushPendingWrites(pReNative, off);
2911
2912 /* Set the instruction number if we're counting. */
2913#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2914 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2915#else
2916 RT_NOREF(idxInstr);
2917#endif
2918
2919 /* Do the job we're here for. */
2920 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2921 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2922 iemNativeVarRegisterRelease(pReNative, idxVar);
2923
2924 return off;
2925}
2926
2927
2928#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2929 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2930
2931/**
2932 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2933 *
2934 * @returns New code buffer offset, UINT32_MAX on failure.
2935 * @param pReNative The native recompile state.
2936 * @param off The code buffer offset.
2937 * @param idxInstr The current instruction.
2938 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2939 * @param cbAlign The alignment in bytes to check against.
2940 */
2941DECL_INLINE_THROW(uint32_t)
2942iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2943 uint8_t idxVarEffAddr, uint8_t cbAlign)
2944{
2945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2946 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2947
2948 /*
2949 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2950 */
2951 off = iemNativeRegFlushPendingWrites(pReNative, off);
2952
2953#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2954 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2955#else
2956 RT_NOREF(idxInstr);
2957#endif
2958
2959 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2960 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2961 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2962
2963 return off;
2964}
2965
2966
2967/*********************************************************************************************************************************
2968* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2969*********************************************************************************************************************************/
2970
2971/**
2972 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2973 *
2974 * @returns Pointer to the condition stack entry on success, NULL on failure
2975 * (too many nestings)
2976 */
2977DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2978{
2979 uint32_t const idxStack = pReNative->cCondDepth;
2980 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2981
2982 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2983 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2984
2985 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2986 pEntry->fInElse = false;
2987 pEntry->fIfExitTb = false;
2988 pEntry->fElseExitTb = false;
2989 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2990 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2991
2992 return pEntry;
2993}
2994
2995
2996/**
2997 * Start of the if-block, snapshotting the register and variable state.
2998 */
2999DECL_INLINE_THROW(void)
3000iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3001{
3002 Assert(offIfBlock != UINT32_MAX);
3003 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3004 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3005 Assert(!pEntry->fInElse);
3006
3007 /* Define the start of the IF block if request or for disassembly purposes. */
3008 if (idxLabelIf != UINT32_MAX)
3009 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3010#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3011 else
3012 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3013#else
3014 RT_NOREF(offIfBlock);
3015#endif
3016
3017 /* Copy the initial state so we can restore it in the 'else' block. */
3018 pEntry->InitialState = pReNative->Core;
3019}
3020
3021
3022#define IEM_MC_ELSE() } while (0); \
3023 off = iemNativeEmitElse(pReNative, off); \
3024 do {
3025
3026/** Emits code related to IEM_MC_ELSE. */
3027DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3028{
3029 /* Check sanity and get the conditional stack entry. */
3030 Assert(off != UINT32_MAX);
3031 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3032 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3033 Assert(!pEntry->fInElse);
3034
3035 /* We can skip dirty register flushing and the dirty register flushing if
3036 the branch already jumped to a TB exit. */
3037 if (!pEntry->fIfExitTb)
3038 {
3039#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3040 /* Writeback any dirty shadow registers. */
3041 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3042 * in one of the branches and leave guest registers already dirty before the start of the if
3043 * block alone. */
3044 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3045#endif
3046
3047 /* Jump to the endif. */
3048 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3049 }
3050# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3051 else
3052 Assert(pReNative->Core.offPc == 0);
3053# endif
3054
3055 /* Define the else label and enter the else part of the condition. */
3056 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3057 pEntry->fInElse = true;
3058
3059 /* Snapshot the core state so we can do a merge at the endif and restore
3060 the snapshot we took at the start of the if-block. */
3061 pEntry->IfFinalState = pReNative->Core;
3062 pReNative->Core = pEntry->InitialState;
3063
3064 return off;
3065}
3066
3067
3068#define IEM_MC_ENDIF() } while (0); \
3069 off = iemNativeEmitEndIf(pReNative, off)
3070
3071/** Emits code related to IEM_MC_ENDIF. */
3072DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3073{
3074 /* Check sanity and get the conditional stack entry. */
3075 Assert(off != UINT32_MAX);
3076 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3077 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3078
3079#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3080 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3081#endif
3082
3083 /*
3084 * If either of the branches exited the TB, we can take the state from the
3085 * other branch and skip all the merging headache.
3086 */
3087 bool fDefinedLabels = false;
3088 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3089 {
3090#ifdef VBOX_STRICT
3091 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3092 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3093 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3094 ? &pEntry->IfFinalState : &pReNative->Core;
3095# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3096 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3097# endif
3098# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3099 Assert(pExitCoreState->offPc == 0);
3100# endif
3101 RT_NOREF(pExitCoreState);
3102#endif
3103
3104 if (!pEntry->fIfExitTb)
3105 {
3106 Assert(pEntry->fInElse);
3107 pReNative->Core = pEntry->IfFinalState;
3108 }
3109 }
3110 else
3111 {
3112 /*
3113 * Now we have find common group with the core state at the end of the
3114 * if-final. Use the smallest common denominator and just drop anything
3115 * that isn't the same in both states.
3116 */
3117 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3118 * which is why we're doing this at the end of the else-block.
3119 * But we'd need more info about future for that to be worth the effort. */
3120 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3121#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3122 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3123 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3124 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3125#endif
3126
3127 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3128 {
3129#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3130 /*
3131 * If the branch has differences in dirty shadow registers, we will flush
3132 * the register only dirty in the current branch and dirty any that's only
3133 * dirty in the other one.
3134 */
3135 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3136 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3137 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3138 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3139 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3140 if (!fGstRegDirtyDiff)
3141 { /* likely */ }
3142 else
3143 {
3144 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3145 if (fGstRegDirtyHead)
3146 {
3147 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3148 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3149 }
3150 }
3151#endif
3152
3153 /*
3154 * Shadowed guest registers.
3155 *
3156 * We drop any shadows where the two states disagree about where
3157 * things are kept. We may end up flushing dirty more registers
3158 * here, if the two branches keeps things in different registers.
3159 */
3160 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3161 if (fGstRegs)
3162 {
3163 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3164 do
3165 {
3166 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3167 fGstRegs &= ~RT_BIT_64(idxGstReg);
3168
3169 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3170 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3171 if ( idxCurHstReg != idxOtherHstReg
3172 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3173 {
3174#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3175 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3176 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3177 idxOtherHstReg, pOther->bmGstRegShadows));
3178#else
3179 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3180 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3181 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3182 idxOtherHstReg, pOther->bmGstRegShadows,
3183 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3184 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3185 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3186 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3187 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3188#endif
3189 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3190 }
3191 } while (fGstRegs);
3192 }
3193 else
3194 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3195
3196#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3197 /*
3198 * Generate jumpy code for flushing dirty registers from the other
3199 * branch that aren't dirty in the current one.
3200 */
3201 if (!fGstRegDirtyTail)
3202 { /* likely */ }
3203 else
3204 {
3205 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3206 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3207
3208 /* First the current branch has to jump over the dirty flushing from the other branch. */
3209 uint32_t const offFixup1 = off;
3210 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3211
3212 /* Put the endif and maybe else label here so the other branch ends up here. */
3213 if (!pEntry->fInElse)
3214 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3215 else
3216 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3217 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3218 fDefinedLabels = true;
3219
3220 /* Flush the dirty guest registers from the other branch. */
3221 while (fGstRegDirtyTail)
3222 {
3223 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3224 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3225 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3226 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3227 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3228
3229 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3230
3231 /* Mismatching shadowing should've been dropped in the previous step already. */
3232 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3233 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3234 }
3235
3236 /* Here is the actual endif label, fixup the above jump to land here. */
3237 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3238 }
3239#endif
3240
3241 /*
3242 * Check variables next. For now we must require them to be identical
3243 * or stuff we can recreate. (No code is emitted here.)
3244 */
3245 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3246#ifdef VBOX_STRICT
3247 uint32_t const offAssert = off;
3248#endif
3249 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3250 if (fVars)
3251 {
3252 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3253 do
3254 {
3255 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3256 fVars &= ~RT_BIT_32(idxVar);
3257
3258 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3259 {
3260 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3261 continue;
3262 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3263 {
3264 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3265 if (idxHstReg != UINT8_MAX)
3266 {
3267 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3268 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3269 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3270 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3271 }
3272 continue;
3273 }
3274 }
3275 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3276 continue;
3277
3278 /* Irreconcilable, so drop it. */
3279 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3280 if (idxHstReg != UINT8_MAX)
3281 {
3282 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3283 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3284 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3285 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3286 }
3287 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3288 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3289 } while (fVars);
3290 }
3291 Assert(off == offAssert);
3292
3293 /*
3294 * Finally, check that the host register allocations matches.
3295 */
3296 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3297 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3298 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3299 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3300 }
3301 }
3302
3303 /*
3304 * Define the endif label and maybe the else one if we're still in the 'if' part.
3305 */
3306 if (!fDefinedLabels)
3307 {
3308 if (!pEntry->fInElse)
3309 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3310 else
3311 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3312 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3313 }
3314
3315 /* Pop the conditional stack.*/
3316 pReNative->cCondDepth -= 1;
3317
3318 return off;
3319}
3320
3321
3322/**
3323 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3324 *
3325 * The compiler should be able to figure this out at compile time, so sprinkling
3326 * constexpr where ever possible here to nudge it along.
3327 */
3328template<uint32_t const a_fEfl>
3329RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3330{
3331 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3332 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3333 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3334 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3335 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3336 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3337 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3338}
3339
3340
3341/**
3342 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3343 *
3344 * The compiler should be able to figure this out at compile time, so sprinkling
3345 * constexpr where ever possible here to nudge it along.
3346 */
3347template<uint32_t const a_fEfl>
3348RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3349{
3350 AssertCompile( a_fEfl == X86_EFL_CF
3351 || a_fEfl == X86_EFL_PF
3352 || a_fEfl == X86_EFL_AF
3353 || a_fEfl == X86_EFL_ZF
3354 || a_fEfl == X86_EFL_SF
3355 || a_fEfl == X86_EFL_OF
3356 || a_fEfl == X86_EFL_DF);
3357 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3358 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3359 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3360 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3361 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3362 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3363 : X86_EFL_DF_BIT;
3364}
3365
3366
3367#define IEM_MC_IF_FLAGS_ANY_BITS_SET(a_fBits) \
3368 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3369 do {
3370
3371/** Emits code for IEM_MC_IF_FLAGS_ANY_BITS_SET. */
3372DECL_INLINE_THROW(uint32_t)
3373iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3374{
3375 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3376 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3377 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3378
3379 /* Get the eflags. */
3380 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3381
3382 /* Test and jump. */
3383 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3384
3385 /* Free but don't flush the EFlags register. */
3386 iemNativeRegFreeTmp(pReNative, idxEflReg);
3387
3388 /* Make a copy of the core state now as we start the if-block. */
3389 iemNativeCondStartIfBlock(pReNative, off);
3390
3391 return off;
3392}
3393
3394
3395#define IEM_MC_IF_FLAGS_NO_BITS_SET(a_fBits) \
3396 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3397 do {
3398
3399/** Emits code for IEM_MC_IF_FLAGS_NO_BITS_SET. */
3400DECL_INLINE_THROW(uint32_t)
3401iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3402{
3403 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3404 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3405 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3406
3407 /* Get the eflags. */
3408 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3409
3410 /* Test and jump. */
3411 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3412
3413 /* Free but don't flush the EFlags register. */
3414 iemNativeRegFreeTmp(pReNative, idxEflReg);
3415
3416 /* Make a copy of the core state now as we start the if-block. */
3417 iemNativeCondStartIfBlock(pReNative, off);
3418
3419 return off;
3420}
3421
3422
3423#define IEM_MC_IF_FLAGS_BIT_SET(a_fBit) \
3424 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3425 iemNativeEflagsToLivenessMask<a_fBit>()); \
3426 do {
3427
3428/** Emits code for IEM_MC_IF_FLAGS_BIT_SET. */
3429DECL_INLINE_THROW(uint32_t)
3430iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3431{
3432 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3433 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3434 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3435
3436 /* Get the eflags. */
3437 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3438
3439 /* Test and jump. */
3440 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3441
3442 /* Free but don't flush the EFlags register. */
3443 iemNativeRegFreeTmp(pReNative, idxEflReg);
3444
3445 /* Make a copy of the core state now as we start the if-block. */
3446 iemNativeCondStartIfBlock(pReNative, off);
3447
3448 return off;
3449}
3450
3451
3452#define IEM_MC_IF_FLAGS_BIT_NOT_SET(a_fBit) \
3453 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3454 iemNativeEflagsToLivenessMask<a_fBit>()); \
3455 do {
3456
3457/** Emits code for IEM_MC_IF_FLAGS_BIT_NOT_SET. */
3458DECL_INLINE_THROW(uint32_t)
3459iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3460{
3461 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3462 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3463 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3464
3465 /* Get the eflags. */
3466 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3467
3468 /* Test and jump. */
3469 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3470
3471 /* Free but don't flush the EFlags register. */
3472 iemNativeRegFreeTmp(pReNative, idxEflReg);
3473
3474 /* Make a copy of the core state now as we start the if-block. */
3475 iemNativeCondStartIfBlock(pReNative, off);
3476
3477 return off;
3478}
3479
3480
3481#define IEM_MC_IF_FLAGS_BITS_EQ(a_fBit1, a_fBit2) \
3482 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3483 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3484 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3485 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3486 do {
3487
3488#define IEM_MC_IF_FLAGS_BITS_NE(a_fBit1, a_fBit2) \
3489 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3490 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3491 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3492 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3493 do {
3494
3495/** Emits code for IEM_MC_IF_FLAGS_BITS_EQ and IEM_MC_IF_FLAGS_BITS_NE. */
3496DECL_INLINE_THROW(uint32_t)
3497iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3498 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3499{
3500 Assert(iBitNo1 != iBitNo2);
3501 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3502 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3503 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3504
3505 /* Get the eflags. */
3506 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3507
3508#ifdef RT_ARCH_AMD64
3509 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3510
3511 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3512 if (iBitNo1 > iBitNo2)
3513 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3514 else
3515 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3516 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3517
3518#elif defined(RT_ARCH_ARM64)
3519 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3520 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3521
3522 /* and tmpreg, eflreg, #1<<iBitNo1 */
3523 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3524
3525 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3526 if (iBitNo1 > iBitNo2)
3527 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3528 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3529 else
3530 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3531 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3532
3533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3534
3535#else
3536# error "Port me"
3537#endif
3538
3539 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3540 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3541 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3542
3543 /* Free but don't flush the EFlags and tmp registers. */
3544 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3545 iemNativeRegFreeTmp(pReNative, idxEflReg);
3546
3547 /* Make a copy of the core state now as we start the if-block. */
3548 iemNativeCondStartIfBlock(pReNative, off);
3549
3550 return off;
3551}
3552
3553
3554#define IEM_MC_IF_FLAGS_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3555 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3556 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3557 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3558 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3559 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3560 do {
3561
3562#define IEM_MC_IF_FLAGS_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3563 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3564 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3565 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3566 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3567 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3568 do {
3569
3570/** Emits code for IEM_MC_IF_FLAGS_BIT_NOT_SET_AND_BITS_EQ and
3571 * IEM_MC_IF_FLAGS_BIT_SET_OR_BITS_NE. */
3572DECL_INLINE_THROW(uint32_t)
3573iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3574 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3575{
3576 Assert(iBitNo1 != iBitNo);
3577 Assert(iBitNo2 != iBitNo);
3578 Assert(iBitNo2 != iBitNo1);
3579 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3580 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3581 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3582
3583 /* We need an if-block label for the non-inverted variant. */
3584 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3585 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3586
3587 /* Get the eflags. */
3588 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3589
3590#ifdef RT_ARCH_AMD64
3591 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3592#elif defined(RT_ARCH_ARM64)
3593 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3594#endif
3595
3596 /* Check for the lone bit first. */
3597 if (!fInverted)
3598 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3599 else
3600 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3601
3602 /* Then extract and compare the other two bits. */
3603#ifdef RT_ARCH_AMD64
3604 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3605 if (iBitNo1 > iBitNo2)
3606 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3607 else
3608 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3609 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3610
3611#elif defined(RT_ARCH_ARM64)
3612 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3613
3614 /* and tmpreg, eflreg, #1<<iBitNo1 */
3615 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3616
3617 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3618 if (iBitNo1 > iBitNo2)
3619 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3620 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3621 else
3622 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3623 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3624
3625 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3626
3627#else
3628# error "Port me"
3629#endif
3630
3631 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3632 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3633 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3634
3635 /* Free but don't flush the EFlags and tmp registers. */
3636 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3637 iemNativeRegFreeTmp(pReNative, idxEflReg);
3638
3639 /* Make a copy of the core state now as we start the if-block. */
3640 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3641
3642 return off;
3643}
3644
3645
3646#define IEM_MC_IF_CX_IS_NZ() \
3647 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3648 do {
3649
3650/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3651DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3652{
3653 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3654
3655 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3656 kIemNativeGstRegUse_ReadOnly);
3657 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3658 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3659
3660 iemNativeCondStartIfBlock(pReNative, off);
3661 return off;
3662}
3663
3664
3665#define IEM_MC_IF_ECX_IS_NZ() \
3666 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3667 do {
3668
3669#define IEM_MC_IF_RCX_IS_NZ() \
3670 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3671 do {
3672
3673/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3674DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3675{
3676 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3677
3678 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3679 kIemNativeGstRegUse_ReadOnly);
3680 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3681 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3682
3683 iemNativeCondStartIfBlock(pReNative, off);
3684 return off;
3685}
3686
3687
3688#define IEM_MC_IF_CX_IS_NOT_ONE() \
3689 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3690 do {
3691
3692/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3693DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3694{
3695 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3696
3697 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3698 kIemNativeGstRegUse_ReadOnly);
3699#ifdef RT_ARCH_AMD64
3700 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3701#else
3702 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3703 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3704 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3705#endif
3706 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3707
3708 iemNativeCondStartIfBlock(pReNative, off);
3709 return off;
3710}
3711
3712
3713#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3714 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3715 do {
3716
3717#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3718 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3719 do {
3720
3721/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3722DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3723{
3724 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3725
3726 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3727 kIemNativeGstRegUse_ReadOnly);
3728 if (f64Bit)
3729 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3730 else
3731 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3732 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3733
3734 iemNativeCondStartIfBlock(pReNative, off);
3735 return off;
3736}
3737
3738
3739#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3740 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3741 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3742 iemNativeEflagsToLivenessMask<a_fBit>()); \
3743 do {
3744
3745#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3746 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3747 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3748 iemNativeEflagsToLivenessMask<a_fBit>()); \
3749 do {
3750
3751/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3752 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3753DECL_INLINE_THROW(uint32_t)
3754iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3755 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3756{
3757 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3758 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3759 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3760
3761 /* We have to load both RCX and EFLAGS before we can start branching,
3762 otherwise we'll end up in the else-block with an inconsistent
3763 register allocator state.
3764 Doing EFLAGS first as it's more likely to be loaded, right? */
3765 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3766 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3767 kIemNativeGstRegUse_ReadOnly);
3768
3769 /** @todo we could reduce this to a single branch instruction by spending a
3770 * temporary register and some setnz stuff. Not sure if loops are
3771 * worth it. */
3772 /* Check CX. */
3773#ifdef RT_ARCH_AMD64
3774 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3775#else
3776 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3777 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3778 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3779#endif
3780
3781 /* Check the EFlags bit. */
3782 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3783 !fCheckIfSet /*fJmpIfSet*/);
3784
3785 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3786 iemNativeRegFreeTmp(pReNative, idxEflReg);
3787
3788 iemNativeCondStartIfBlock(pReNative, off);
3789 return off;
3790}
3791
3792
3793#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3794 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3795 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3796 iemNativeEflagsToLivenessMask<a_fBit>()); \
3797 do {
3798
3799#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3800 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3801 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3802 iemNativeEflagsToLivenessMask<a_fBit>()); \
3803 do {
3804
3805#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3806 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3807 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3808 iemNativeEflagsToLivenessMask<a_fBit>()); \
3809 do {
3810
3811#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3812 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3813 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3814 iemNativeEflagsToLivenessMask<a_fBit>()); \
3815 do {
3816
3817/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3818 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3819 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3820 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3821DECL_INLINE_THROW(uint32_t)
3822iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3823 unsigned iBitNo, uint64_t fLivenessEFlBit)
3824
3825{
3826 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3827 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3828 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3829
3830 /* We have to load both RCX and EFLAGS before we can start branching,
3831 otherwise we'll end up in the else-block with an inconsistent
3832 register allocator state.
3833 Doing EFLAGS first as it's more likely to be loaded, right? */
3834 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3835 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3836 kIemNativeGstRegUse_ReadOnly);
3837
3838 /** @todo we could reduce this to a single branch instruction by spending a
3839 * temporary register and some setnz stuff. Not sure if loops are
3840 * worth it. */
3841 /* Check RCX/ECX. */
3842 if (f64Bit)
3843 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3844 else
3845 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3846
3847 /* Check the EFlags bit. */
3848 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3849 !fCheckIfSet /*fJmpIfSet*/);
3850
3851 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3852 iemNativeRegFreeTmp(pReNative, idxEflReg);
3853
3854 iemNativeCondStartIfBlock(pReNative, off);
3855 return off;
3856}
3857
3858
3859#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3860 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3861 do {
3862
3863/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3864DECL_INLINE_THROW(uint32_t)
3865iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3866{
3867 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3868
3869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3870 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3871 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3872 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3873
3874 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3875
3876 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3877
3878 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3879
3880 iemNativeCondStartIfBlock(pReNative, off);
3881 return off;
3882}
3883
3884
3885#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3886 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3887 do {
3888
3889/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3890DECL_INLINE_THROW(uint32_t)
3891iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3892{
3893 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3894 Assert(iGReg < 16);
3895
3896 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3897 kIemNativeGstRegUse_ReadOnly);
3898
3899 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3900
3901 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3902
3903 iemNativeCondStartIfBlock(pReNative, off);
3904 return off;
3905}
3906
3907
3908
3909/*********************************************************************************************************************************
3910* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3911*********************************************************************************************************************************/
3912
3913#define IEM_MC_NOREF(a_Name) \
3914 RT_NOREF_PV(a_Name)
3915
3916#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3917 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3918
3919#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3920 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3921
3922#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3923 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3924
3925#define IEM_MC_LOCAL(a_Type, a_Name) \
3926 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3927
3928#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3929 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3930
3931#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3932 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3933
3934
3935/**
3936 * Sets the host register for @a idxVarRc to @a idxReg.
3937 *
3938 * Any guest register shadowing will be implictly dropped by this call.
3939 *
3940 * The variable must not have any register associated with it (causes
3941 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3942 * implied.
3943 *
3944 * @returns idxReg
3945 * @param pReNative The recompiler state.
3946 * @param idxVar The variable.
3947 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3948 * @param off For recording in debug info.
3949 * @param fAllocated Set if the register is already allocated, false if not.
3950 *
3951 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3952 */
3953DECL_INLINE_THROW(uint8_t)
3954iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3955{
3956 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3957 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3958 Assert(!pVar->fRegAcquired);
3959 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3960 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3961 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3962 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3963
3964 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3965 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3966
3967 iemNativeVarSetKindToStack(pReNative, idxVar);
3968 pVar->idxReg = idxReg;
3969
3970 return idxReg;
3971}
3972
3973
3974/**
3975 * A convenient helper function.
3976 */
3977DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3978 uint8_t idxReg, uint32_t *poff)
3979{
3980 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3981 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3982 return idxReg;
3983}
3984
3985
3986/**
3987 * This is called by IEM_MC_END() to clean up all variables.
3988 */
3989DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3990{
3991 uint32_t const bmVars = pReNative->Core.bmVars;
3992 if (bmVars != 0)
3993 iemNativeVarFreeAllSlow(pReNative, bmVars);
3994 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3995 Assert(pReNative->Core.bmStack == 0);
3996}
3997
3998
3999#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4000
4001/**
4002 * This is called by IEM_MC_FREE_LOCAL.
4003 */
4004DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4005{
4006 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4007 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4008 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4009}
4010
4011
4012#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4013
4014/**
4015 * This is called by IEM_MC_FREE_ARG.
4016 */
4017DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4018{
4019 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4020 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4021 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4022}
4023
4024
4025#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4026
4027/**
4028 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4029 */
4030DECL_INLINE_THROW(uint32_t)
4031iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4032{
4033 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4034 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4035 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4036 Assert( pVarDst->cbVar == sizeof(uint16_t)
4037 || pVarDst->cbVar == sizeof(uint32_t));
4038
4039 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4040 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4041 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4042 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4043 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4044
4045 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4046
4047 /*
4048 * Special case for immediates.
4049 */
4050 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4051 {
4052 switch (pVarDst->cbVar)
4053 {
4054 case sizeof(uint16_t):
4055 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4056 break;
4057 case sizeof(uint32_t):
4058 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4059 break;
4060 default: AssertFailed(); break;
4061 }
4062 }
4063 else
4064 {
4065 /*
4066 * The generic solution for now.
4067 */
4068 /** @todo optimize this by having the python script make sure the source
4069 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4070 * statement. Then we could just transfer the register assignments. */
4071 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4072 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4073 switch (pVarDst->cbVar)
4074 {
4075 case sizeof(uint16_t):
4076 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4077 break;
4078 case sizeof(uint32_t):
4079 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4080 break;
4081 default: AssertFailed(); break;
4082 }
4083 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4084 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4085 }
4086 return off;
4087}
4088
4089
4090
4091/*********************************************************************************************************************************
4092* Emitters for IEM_MC_CALL_CIMPL_XXX *
4093*********************************************************************************************************************************/
4094
4095/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4096DECL_INLINE_THROW(uint32_t)
4097iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4098 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4099
4100{
4101 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4102 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4103
4104 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4105 when a calls clobber any of the relevant control registers. */
4106#if 1
4107 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4108 {
4109 /* Likely as long as call+ret are done via cimpl. */
4110 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4111 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4112 }
4113 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4114 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4115 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4116 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4117 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4118 else
4119 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4120 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4121 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4122
4123#else
4124 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4125 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4126 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4127 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4128 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4129 || pfnCImpl == (uintptr_t)iemCImpl_callf
4130 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4131 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4132 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4133 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4134 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4135#endif
4136
4137#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4138 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4139 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4140 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4141#endif
4142
4143 /*
4144 * Do all the call setup and cleanup.
4145 */
4146 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4147
4148 /*
4149 * Load the two or three hidden arguments.
4150 */
4151#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4152 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
4153 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4154 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4155#else
4156 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4157 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4158#endif
4159
4160 /*
4161 * Make the call and check the return code.
4162 *
4163 * Shadow PC copies are always flushed here, other stuff depends on flags.
4164 * Segment and general purpose registers are explictily flushed via the
4165 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4166 * macros.
4167 */
4168 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4169#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4170 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
4171#endif
4172 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4173 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4174 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4175 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4176
4177#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4178 pReNative->Core.fDebugPcInitialized = false;
4179 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4180#endif
4181
4182 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4183}
4184
4185
4186#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4187 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4188
4189/** Emits code for IEM_MC_CALL_CIMPL_1. */
4190DECL_INLINE_THROW(uint32_t)
4191iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4192 uintptr_t pfnCImpl, uint8_t idxArg0)
4193{
4194 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4195 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4196}
4197
4198
4199#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4200 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4201
4202/** Emits code for IEM_MC_CALL_CIMPL_2. */
4203DECL_INLINE_THROW(uint32_t)
4204iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4205 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4206{
4207 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4208 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4209 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4210}
4211
4212
4213#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4214 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4215 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4216
4217/** Emits code for IEM_MC_CALL_CIMPL_3. */
4218DECL_INLINE_THROW(uint32_t)
4219iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4220 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4221{
4222 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4223 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4224 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4225 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4226}
4227
4228
4229#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4230 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4231 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4232
4233/** Emits code for IEM_MC_CALL_CIMPL_4. */
4234DECL_INLINE_THROW(uint32_t)
4235iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4236 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4237{
4238 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4239 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4240 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4241 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4242 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4243}
4244
4245
4246#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4247 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4248 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4249
4250/** Emits code for IEM_MC_CALL_CIMPL_4. */
4251DECL_INLINE_THROW(uint32_t)
4252iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4253 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4254{
4255 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4256 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4257 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4260 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4261}
4262
4263
4264/** Recompiler debugging: Flush guest register shadow copies. */
4265#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4266
4267
4268
4269/*********************************************************************************************************************************
4270* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4271*********************************************************************************************************************************/
4272
4273/**
4274 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4275 */
4276DECL_INLINE_THROW(uint32_t)
4277iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4278 uintptr_t pfnAImpl, uint8_t cArgs)
4279{
4280 if (idxVarRc != UINT8_MAX)
4281 {
4282 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4283 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4284 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4285 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4286 }
4287
4288 /*
4289 * Do all the call setup and cleanup.
4290 *
4291 * It is only required to flush pending guest register writes in call volatile registers as
4292 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4293 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4294 * no matter the fFlushPendingWrites parameter.
4295 */
4296 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4297
4298 /*
4299 * Make the call and update the return code variable if we've got one.
4300 */
4301 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4302 if (idxVarRc != UINT8_MAX)
4303 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4304
4305 return off;
4306}
4307
4308
4309
4310#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4311 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4312
4313#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4314 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4315
4316/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4317DECL_INLINE_THROW(uint32_t)
4318iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4319{
4320 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4321}
4322
4323
4324#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4325 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4326
4327#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4328 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4329
4330/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4331DECL_INLINE_THROW(uint32_t)
4332iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4333{
4334 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4335 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4336}
4337
4338
4339#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4340 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4341
4342#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4343 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4344
4345/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4346DECL_INLINE_THROW(uint32_t)
4347iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4348 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4349{
4350 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4352 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4353}
4354
4355
4356#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4357 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4358
4359#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4360 IEM_MC_LOCAL(a_rcType, a_rc); \
4361 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4362
4363/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4364DECL_INLINE_THROW(uint32_t)
4365iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4366 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4367{
4368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4369 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4370 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4371 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4372}
4373
4374
4375#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4376 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4377
4378#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4379 IEM_MC_LOCAL(a_rcType, a_rc); \
4380 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4381
4382/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4383DECL_INLINE_THROW(uint32_t)
4384iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4385 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4386{
4387 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4388 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4389 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4391 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4392}
4393
4394
4395
4396/*********************************************************************************************************************************
4397* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4398*********************************************************************************************************************************/
4399
4400#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4401 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4402
4403#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4404 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4405
4406#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4407 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4408
4409#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4410 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4411
4412
4413/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4414 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4415DECL_INLINE_THROW(uint32_t)
4416iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4417{
4418 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4419 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4420 Assert(iGRegEx < 20);
4421
4422 /* Same discussion as in iemNativeEmitFetchGregU16 */
4423 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4424 kIemNativeGstRegUse_ReadOnly);
4425
4426 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4427 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4428
4429 /* The value is zero-extended to the full 64-bit host register width. */
4430 if (iGRegEx < 16)
4431 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4432 else
4433 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4434
4435 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4436 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4437 return off;
4438}
4439
4440
4441#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4442 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4443
4444#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4445 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4446
4447#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4448 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4449
4450/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4451DECL_INLINE_THROW(uint32_t)
4452iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4453{
4454 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4455 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4456 Assert(iGRegEx < 20);
4457
4458 /* Same discussion as in iemNativeEmitFetchGregU16 */
4459 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4460 kIemNativeGstRegUse_ReadOnly);
4461
4462 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4463 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4464
4465 if (iGRegEx < 16)
4466 {
4467 switch (cbSignExtended)
4468 {
4469 case sizeof(uint16_t):
4470 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4471 break;
4472 case sizeof(uint32_t):
4473 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4474 break;
4475 case sizeof(uint64_t):
4476 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4477 break;
4478 default: AssertFailed(); break;
4479 }
4480 }
4481 else
4482 {
4483 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4484 switch (cbSignExtended)
4485 {
4486 case sizeof(uint16_t):
4487 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4488 break;
4489 case sizeof(uint32_t):
4490 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4491 break;
4492 case sizeof(uint64_t):
4493 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4494 break;
4495 default: AssertFailed(); break;
4496 }
4497 }
4498
4499 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4500 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4501 return off;
4502}
4503
4504
4505
4506#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4507 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4508
4509#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4510 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4511
4512#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4513 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4514
4515/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4516DECL_INLINE_THROW(uint32_t)
4517iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4518{
4519 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4520 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4521 Assert(iGReg < 16);
4522
4523 /*
4524 * We can either just load the low 16-bit of the GPR into a host register
4525 * for the variable, or we can do so via a shadow copy host register. The
4526 * latter will avoid having to reload it if it's being stored later, but
4527 * will waste a host register if it isn't touched again. Since we don't
4528 * know what going to happen, we choose the latter for now.
4529 */
4530 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4531 kIemNativeGstRegUse_ReadOnly);
4532
4533 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4534 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4535 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4536 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4537
4538 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4539 return off;
4540}
4541
4542#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4543 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4544
4545/** Emits code for IEM_MC_FETCH_GREG_I16. */
4546DECL_INLINE_THROW(uint32_t)
4547iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4548{
4549 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4550 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4551 Assert(iGReg < 16);
4552
4553 /*
4554 * We can either just load the low 16-bit of the GPR into a host register
4555 * for the variable, or we can do so via a shadow copy host register. The
4556 * latter will avoid having to reload it if it's being stored later, but
4557 * will waste a host register if it isn't touched again. Since we don't
4558 * know what going to happen, we choose the latter for now.
4559 */
4560 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4561 kIemNativeGstRegUse_ReadOnly);
4562
4563 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4564 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4565#ifdef RT_ARCH_AMD64
4566 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4567#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4568 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4569#endif
4570 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4571
4572 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4573 return off;
4574}
4575
4576
4577#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4578 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4579
4580#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4581 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4582
4583/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4584DECL_INLINE_THROW(uint32_t)
4585iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4586{
4587 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4588 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4589 Assert(iGReg < 16);
4590
4591 /*
4592 * We can either just load the low 16-bit of the GPR into a host register
4593 * for the variable, or we can do so via a shadow copy host register. The
4594 * latter will avoid having to reload it if it's being stored later, but
4595 * will waste a host register if it isn't touched again. Since we don't
4596 * know what going to happen, we choose the latter for now.
4597 */
4598 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4599 kIemNativeGstRegUse_ReadOnly);
4600
4601 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4602 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4603 if (cbSignExtended == sizeof(uint32_t))
4604 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4605 else
4606 {
4607 Assert(cbSignExtended == sizeof(uint64_t));
4608 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4609 }
4610 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4611
4612 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4613 return off;
4614}
4615
4616
4617#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4618 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4619
4620#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4621 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4622
4623#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4624 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4625
4626/** Emits code for IEM_MC_FETCH_GREG_U32. */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4629{
4630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4631 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4632 Assert(iGReg < 16);
4633
4634 /*
4635 * We can either just load the low 16-bit of the GPR into a host register
4636 * for the variable, or we can do so via a shadow copy host register. The
4637 * latter will avoid having to reload it if it's being stored later, but
4638 * will waste a host register if it isn't touched again. Since we don't
4639 * know what going to happen, we choose the latter for now.
4640 */
4641 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4642 kIemNativeGstRegUse_ReadOnly);
4643
4644 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4646 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4647 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4648
4649 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4650 return off;
4651}
4652
4653
4654#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4655 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4656
4657/** Emits code for IEM_MC_FETCH_GREG_U32. */
4658DECL_INLINE_THROW(uint32_t)
4659iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4660{
4661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4663 Assert(iGReg < 16);
4664
4665 /*
4666 * We can either just load the low 32-bit of the GPR into a host register
4667 * for the variable, or we can do so via a shadow copy host register. The
4668 * latter will avoid having to reload it if it's being stored later, but
4669 * will waste a host register if it isn't touched again. Since we don't
4670 * know what going to happen, we choose the latter for now.
4671 */
4672 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4673 kIemNativeGstRegUse_ReadOnly);
4674
4675 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4676 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4677 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4678 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4679
4680 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4681 return off;
4682}
4683
4684
4685#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4686 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4687
4688#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4689 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4690
4691/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4692 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4693DECL_INLINE_THROW(uint32_t)
4694iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4695{
4696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4697 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4698 Assert(iGReg < 16);
4699
4700 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4701 kIemNativeGstRegUse_ReadOnly);
4702
4703 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4704 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4705 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4706 /** @todo name the register a shadow one already? */
4707 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4708
4709 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4710 return off;
4711}
4712
4713
4714#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4715 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4716
4717/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4718DECL_INLINE_THROW(uint32_t)
4719iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4720{
4721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4723 Assert(iGRegLo < 16 && iGRegHi < 16);
4724
4725 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4726 kIemNativeGstRegUse_ReadOnly);
4727 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4728 kIemNativeGstRegUse_ReadOnly);
4729
4730 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4731 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4732 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4733 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4734
4735 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4736 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4737 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4738 return off;
4739}
4740
4741
4742/*********************************************************************************************************************************
4743* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4744*********************************************************************************************************************************/
4745
4746#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4747 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4748
4749/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4750DECL_INLINE_THROW(uint32_t)
4751iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4752{
4753 Assert(iGRegEx < 20);
4754 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4755 kIemNativeGstRegUse_ForUpdate);
4756#ifdef RT_ARCH_AMD64
4757 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4758
4759 /* To the lowest byte of the register: mov r8, imm8 */
4760 if (iGRegEx < 16)
4761 {
4762 if (idxGstTmpReg >= 8)
4763 pbCodeBuf[off++] = X86_OP_REX_B;
4764 else if (idxGstTmpReg >= 4)
4765 pbCodeBuf[off++] = X86_OP_REX;
4766 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4767 pbCodeBuf[off++] = u8Value;
4768 }
4769 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4770 else if (idxGstTmpReg < 4)
4771 {
4772 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4773 pbCodeBuf[off++] = u8Value;
4774 }
4775 else
4776 {
4777 /* ror reg64, 8 */
4778 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4779 pbCodeBuf[off++] = 0xc1;
4780 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4781 pbCodeBuf[off++] = 8;
4782
4783 /* mov reg8, imm8 */
4784 if (idxGstTmpReg >= 8)
4785 pbCodeBuf[off++] = X86_OP_REX_B;
4786 else if (idxGstTmpReg >= 4)
4787 pbCodeBuf[off++] = X86_OP_REX;
4788 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4789 pbCodeBuf[off++] = u8Value;
4790
4791 /* rol reg64, 8 */
4792 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4793 pbCodeBuf[off++] = 0xc1;
4794 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4795 pbCodeBuf[off++] = 8;
4796 }
4797
4798#elif defined(RT_ARCH_ARM64)
4799 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4800 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4801 if (iGRegEx < 16)
4802 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4803 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4804 else
4805 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4806 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4807 iemNativeRegFreeTmp(pReNative, idxImmReg);
4808
4809#else
4810# error "Port me!"
4811#endif
4812
4813 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4814
4815#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4816 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4817#endif
4818
4819 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4820 return off;
4821}
4822
4823
4824#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4825 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4826
4827/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4828DECL_INLINE_THROW(uint32_t)
4829iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4830{
4831 Assert(iGRegEx < 20);
4832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4833
4834 /*
4835 * If it's a constant value (unlikely) we treat this as a
4836 * IEM_MC_STORE_GREG_U8_CONST statement.
4837 */
4838 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4839 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4840 { /* likely */ }
4841 else
4842 {
4843 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4844 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4845 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4846 }
4847
4848 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4849 kIemNativeGstRegUse_ForUpdate);
4850 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4851
4852#ifdef RT_ARCH_AMD64
4853 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4854 if (iGRegEx < 16)
4855 {
4856 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4857 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4858 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4859 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4860 pbCodeBuf[off++] = X86_OP_REX;
4861 pbCodeBuf[off++] = 0x8a;
4862 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4863 }
4864 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4865 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4866 {
4867 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4868 pbCodeBuf[off++] = 0x8a;
4869 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4870 }
4871 else
4872 {
4873 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4874
4875 /* ror reg64, 8 */
4876 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4877 pbCodeBuf[off++] = 0xc1;
4878 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4879 pbCodeBuf[off++] = 8;
4880
4881 /* mov reg8, reg8(r/m) */
4882 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4883 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4884 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4885 pbCodeBuf[off++] = X86_OP_REX;
4886 pbCodeBuf[off++] = 0x8a;
4887 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4888
4889 /* rol reg64, 8 */
4890 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4891 pbCodeBuf[off++] = 0xc1;
4892 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4893 pbCodeBuf[off++] = 8;
4894 }
4895
4896#elif defined(RT_ARCH_ARM64)
4897 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4898 or
4899 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4900 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4901 if (iGRegEx < 16)
4902 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4903 else
4904 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4905
4906#else
4907# error "Port me!"
4908#endif
4909 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4910
4911 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4912
4913#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4914 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4915#endif
4916 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4917 return off;
4918}
4919
4920
4921
4922#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4923 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4924
4925/** Emits code for IEM_MC_STORE_GREG_U16. */
4926DECL_INLINE_THROW(uint32_t)
4927iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4928{
4929 Assert(iGReg < 16);
4930 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4931 kIemNativeGstRegUse_ForUpdate);
4932#ifdef RT_ARCH_AMD64
4933 /* mov reg16, imm16 */
4934 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4935 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4936 if (idxGstTmpReg >= 8)
4937 pbCodeBuf[off++] = X86_OP_REX_B;
4938 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4939 pbCodeBuf[off++] = RT_BYTE1(uValue);
4940 pbCodeBuf[off++] = RT_BYTE2(uValue);
4941
4942#elif defined(RT_ARCH_ARM64)
4943 /* movk xdst, #uValue, lsl #0 */
4944 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4945 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4946
4947#else
4948# error "Port me!"
4949#endif
4950
4951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4952
4953#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4954 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4955#endif
4956 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4957 return off;
4958}
4959
4960
4961#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4962 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4963
4964/** Emits code for IEM_MC_STORE_GREG_U16. */
4965DECL_INLINE_THROW(uint32_t)
4966iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4967{
4968 Assert(iGReg < 16);
4969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4970
4971 /*
4972 * If it's a constant value (unlikely) we treat this as a
4973 * IEM_MC_STORE_GREG_U16_CONST statement.
4974 */
4975 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4976 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4977 { /* likely */ }
4978 else
4979 {
4980 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4981 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4982 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4983 }
4984
4985 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4986 kIemNativeGstRegUse_ForUpdate);
4987
4988#ifdef RT_ARCH_AMD64
4989 /* mov reg16, reg16 or [mem16] */
4990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4991 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4992 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4993 {
4994 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4995 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4996 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4997 pbCodeBuf[off++] = 0x8b;
4998 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4999 }
5000 else
5001 {
5002 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5003 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5004 if (idxGstTmpReg >= 8)
5005 pbCodeBuf[off++] = X86_OP_REX_R;
5006 pbCodeBuf[off++] = 0x8b;
5007 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5008 }
5009
5010#elif defined(RT_ARCH_ARM64)
5011 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5012 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5013 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5014 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5015 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5016
5017#else
5018# error "Port me!"
5019#endif
5020
5021 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5022
5023#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5024 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5025#endif
5026 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5027 return off;
5028}
5029
5030
5031#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5032 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5033
5034/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5035DECL_INLINE_THROW(uint32_t)
5036iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5037{
5038 Assert(iGReg < 16);
5039 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5040 kIemNativeGstRegUse_ForFullWrite);
5041 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5042#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5043 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5044#endif
5045 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5046 return off;
5047}
5048
5049
5050#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5051 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5052
5053#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5054 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5055
5056/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5057DECL_INLINE_THROW(uint32_t)
5058iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5059{
5060 Assert(iGReg < 16);
5061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5062
5063 /*
5064 * If it's a constant value (unlikely) we treat this as a
5065 * IEM_MC_STORE_GREG_U32_CONST statement.
5066 */
5067 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5068 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5069 { /* likely */ }
5070 else
5071 {
5072 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5073 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5074 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5075 }
5076
5077 /*
5078 * For the rest we allocate a guest register for the variable and writes
5079 * it to the CPUMCTX structure.
5080 */
5081 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5082#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5083 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5084#else
5085 RT_NOREF(idxVarReg);
5086#endif
5087#ifdef VBOX_STRICT
5088 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5089#endif
5090 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5091 return off;
5092}
5093
5094
5095#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5096 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5097
5098/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5099DECL_INLINE_THROW(uint32_t)
5100iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5101{
5102 Assert(iGReg < 16);
5103 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5104 kIemNativeGstRegUse_ForFullWrite);
5105 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5106#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5107 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5108#endif
5109 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5110 return off;
5111}
5112
5113
5114#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5115 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5116
5117#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5118 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5119
5120/** Emits code for IEM_MC_STORE_GREG_U64. */
5121DECL_INLINE_THROW(uint32_t)
5122iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5123{
5124 Assert(iGReg < 16);
5125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5126
5127 /*
5128 * If it's a constant value (unlikely) we treat this as a
5129 * IEM_MC_STORE_GREG_U64_CONST statement.
5130 */
5131 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5132 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5133 { /* likely */ }
5134 else
5135 {
5136 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5137 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5138 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5139 }
5140
5141 /*
5142 * For the rest we allocate a guest register for the variable and writes
5143 * it to the CPUMCTX structure.
5144 */
5145 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5146#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5147 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5148#else
5149 RT_NOREF(idxVarReg);
5150#endif
5151 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5152 return off;
5153}
5154
5155
5156#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5157 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5158
5159/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5160DECL_INLINE_THROW(uint32_t)
5161iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5162{
5163 Assert(iGReg < 16);
5164 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5165 kIemNativeGstRegUse_ForUpdate);
5166 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5167#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5168 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5169#endif
5170 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5171 return off;
5172}
5173
5174
5175#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5176 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5177
5178/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5179DECL_INLINE_THROW(uint32_t)
5180iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5181{
5182 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5183 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5184 Assert(iGRegLo < 16 && iGRegHi < 16);
5185
5186 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5187 kIemNativeGstRegUse_ForFullWrite);
5188 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5189 kIemNativeGstRegUse_ForFullWrite);
5190
5191 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5192 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5193 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5194 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5195
5196 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5197 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5198 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5199 return off;
5200}
5201
5202
5203/*********************************************************************************************************************************
5204* General purpose register manipulation (add, sub). *
5205*********************************************************************************************************************************/
5206
5207#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5208 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5209
5210/** Emits code for IEM_MC_ADD_GREG_U16. */
5211DECL_INLINE_THROW(uint32_t)
5212iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5213{
5214 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5215 kIemNativeGstRegUse_ForUpdate);
5216
5217#ifdef RT_ARCH_AMD64
5218 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5219 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5220 if (idxGstTmpReg >= 8)
5221 pbCodeBuf[off++] = X86_OP_REX_B;
5222 if (uAddend == 1)
5223 {
5224 pbCodeBuf[off++] = 0xff; /* inc */
5225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5226 }
5227 else
5228 {
5229 pbCodeBuf[off++] = 0x81;
5230 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5231 pbCodeBuf[off++] = uAddend;
5232 pbCodeBuf[off++] = 0;
5233 }
5234
5235#else
5236 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5237 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5238
5239 /* sub tmp, gstgrp, uAddend */
5240 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5241
5242 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5243 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5244
5245 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5246#endif
5247
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249
5250#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5251 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5252#endif
5253
5254 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5255 return off;
5256}
5257
5258
5259#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5260 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5261
5262#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5263 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5264
5265/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5266DECL_INLINE_THROW(uint32_t)
5267iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5268{
5269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5270 kIemNativeGstRegUse_ForUpdate);
5271
5272#ifdef RT_ARCH_AMD64
5273 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5274 if (f64Bit)
5275 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5276 else if (idxGstTmpReg >= 8)
5277 pbCodeBuf[off++] = X86_OP_REX_B;
5278 if (uAddend == 1)
5279 {
5280 pbCodeBuf[off++] = 0xff; /* inc */
5281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5282 }
5283 else if (uAddend < 128)
5284 {
5285 pbCodeBuf[off++] = 0x83; /* add */
5286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5287 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5288 }
5289 else
5290 {
5291 pbCodeBuf[off++] = 0x81; /* add */
5292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5293 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5294 pbCodeBuf[off++] = 0;
5295 pbCodeBuf[off++] = 0;
5296 pbCodeBuf[off++] = 0;
5297 }
5298
5299#else
5300 /* sub tmp, gstgrp, uAddend */
5301 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5302 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5303
5304#endif
5305
5306 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5307
5308#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5309 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5310#endif
5311
5312 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5313 return off;
5314}
5315
5316
5317
5318#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5319 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5320
5321/** Emits code for IEM_MC_SUB_GREG_U16. */
5322DECL_INLINE_THROW(uint32_t)
5323iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5324{
5325 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5326 kIemNativeGstRegUse_ForUpdate);
5327
5328#ifdef RT_ARCH_AMD64
5329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5330 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5331 if (idxGstTmpReg >= 8)
5332 pbCodeBuf[off++] = X86_OP_REX_B;
5333 if (uSubtrahend == 1)
5334 {
5335 pbCodeBuf[off++] = 0xff; /* dec */
5336 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5337 }
5338 else
5339 {
5340 pbCodeBuf[off++] = 0x81;
5341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5342 pbCodeBuf[off++] = uSubtrahend;
5343 pbCodeBuf[off++] = 0;
5344 }
5345
5346#else
5347 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5348 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5349
5350 /* sub tmp, gstgrp, uSubtrahend */
5351 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5352
5353 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5354 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5355
5356 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5357#endif
5358
5359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5360
5361#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5362 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5363#endif
5364
5365 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5366 return off;
5367}
5368
5369
5370#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5371 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5372
5373#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5374 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5375
5376/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5377DECL_INLINE_THROW(uint32_t)
5378iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5379{
5380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5381 kIemNativeGstRegUse_ForUpdate);
5382
5383#ifdef RT_ARCH_AMD64
5384 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5385 if (f64Bit)
5386 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5387 else if (idxGstTmpReg >= 8)
5388 pbCodeBuf[off++] = X86_OP_REX_B;
5389 if (uSubtrahend == 1)
5390 {
5391 pbCodeBuf[off++] = 0xff; /* dec */
5392 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5393 }
5394 else if (uSubtrahend < 128)
5395 {
5396 pbCodeBuf[off++] = 0x83; /* sub */
5397 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5398 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5399 }
5400 else
5401 {
5402 pbCodeBuf[off++] = 0x81; /* sub */
5403 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5404 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5405 pbCodeBuf[off++] = 0;
5406 pbCodeBuf[off++] = 0;
5407 pbCodeBuf[off++] = 0;
5408 }
5409
5410#else
5411 /* sub tmp, gstgrp, uSubtrahend */
5412 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5414
5415#endif
5416
5417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5418
5419#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5420 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5421#endif
5422
5423 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5424 return off;
5425}
5426
5427
5428#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5429 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5430
5431#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5432 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5433
5434#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5435 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5436
5437#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5438 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5439
5440/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5441DECL_INLINE_THROW(uint32_t)
5442iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5443{
5444#ifdef VBOX_STRICT
5445 switch (cbMask)
5446 {
5447 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5448 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5449 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5450 case sizeof(uint64_t): break;
5451 default: AssertFailedBreak();
5452 }
5453#endif
5454
5455 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5456 kIemNativeGstRegUse_ForUpdate);
5457
5458 switch (cbMask)
5459 {
5460 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5461 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5462 break;
5463 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5464 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5465 break;
5466 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5467 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5468 break;
5469 case sizeof(uint64_t):
5470 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5471 break;
5472 default: AssertFailedBreak();
5473 }
5474
5475 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5476
5477#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5478 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5479#endif
5480
5481 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5482 return off;
5483}
5484
5485
5486#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5487 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5488
5489#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5490 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5491
5492#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5493 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5494
5495#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5496 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5497
5498/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5499DECL_INLINE_THROW(uint32_t)
5500iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5501{
5502#ifdef VBOX_STRICT
5503 switch (cbMask)
5504 {
5505 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5506 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5507 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5508 case sizeof(uint64_t): break;
5509 default: AssertFailedBreak();
5510 }
5511#endif
5512
5513 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5514 kIemNativeGstRegUse_ForUpdate);
5515
5516 switch (cbMask)
5517 {
5518 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5519 case sizeof(uint16_t):
5520 case sizeof(uint64_t):
5521 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5522 break;
5523 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5524 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5525 break;
5526 default: AssertFailedBreak();
5527 }
5528
5529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5530
5531#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5533#endif
5534
5535 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5536 return off;
5537}
5538
5539
5540/*********************************************************************************************************************************
5541* Local/Argument variable manipulation (add, sub, and, or). *
5542*********************************************************************************************************************************/
5543
5544#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5545 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5546
5547#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5548 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5549
5550#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5551 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5552
5553#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5554 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5555
5556
5557#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5558 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5559
5560#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5561 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5562
5563#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5564 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5565
5566/** Emits code for AND'ing a local and a constant value. */
5567DECL_INLINE_THROW(uint32_t)
5568iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5569{
5570#ifdef VBOX_STRICT
5571 switch (cbMask)
5572 {
5573 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5574 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5575 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5576 case sizeof(uint64_t): break;
5577 default: AssertFailedBreak();
5578 }
5579#endif
5580
5581 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5582 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5583
5584 if (cbMask <= sizeof(uint32_t))
5585 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5586 else
5587 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5588
5589 iemNativeVarRegisterRelease(pReNative, idxVar);
5590 return off;
5591}
5592
5593
5594#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5595 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5596
5597#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5598 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5599
5600#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5601 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5602
5603#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5604 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5605
5606/** Emits code for OR'ing a local and a constant value. */
5607DECL_INLINE_THROW(uint32_t)
5608iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5609{
5610#ifdef VBOX_STRICT
5611 switch (cbMask)
5612 {
5613 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5614 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5615 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5616 case sizeof(uint64_t): break;
5617 default: AssertFailedBreak();
5618 }
5619#endif
5620
5621 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5622 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5623
5624 if (cbMask <= sizeof(uint32_t))
5625 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5626 else
5627 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5628
5629 iemNativeVarRegisterRelease(pReNative, idxVar);
5630 return off;
5631}
5632
5633
5634#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5635 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5636
5637#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5638 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5639
5640#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5641 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5642
5643/** Emits code for reversing the byte order in a local value. */
5644DECL_INLINE_THROW(uint32_t)
5645iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5646{
5647 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5648 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5649
5650 switch (cbLocal)
5651 {
5652 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5653 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5654 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5655 default: AssertFailedBreak();
5656 }
5657
5658 iemNativeVarRegisterRelease(pReNative, idxVar);
5659 return off;
5660}
5661
5662
5663#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5664 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5665
5666#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5667 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5668
5669#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5670 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5671
5672/** Emits code for shifting left a local value. */
5673DECL_INLINE_THROW(uint32_t)
5674iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5675{
5676#ifdef VBOX_STRICT
5677 switch (cbLocal)
5678 {
5679 case sizeof(uint8_t): Assert(cShift < 8); break;
5680 case sizeof(uint16_t): Assert(cShift < 16); break;
5681 case sizeof(uint32_t): Assert(cShift < 32); break;
5682 case sizeof(uint64_t): Assert(cShift < 64); break;
5683 default: AssertFailedBreak();
5684 }
5685#endif
5686
5687 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5689
5690 if (cbLocal <= sizeof(uint32_t))
5691 {
5692 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5693 if (cbLocal < sizeof(uint32_t))
5694 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5695 cbLocal == sizeof(uint16_t)
5696 ? UINT32_C(0xffff)
5697 : UINT32_C(0xff));
5698 }
5699 else
5700 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5701
5702 iemNativeVarRegisterRelease(pReNative, idxVar);
5703 return off;
5704}
5705
5706
5707#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5708 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5709
5710#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5711 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5712
5713#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5714 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5715
5716/** Emits code for shifting left a local value. */
5717DECL_INLINE_THROW(uint32_t)
5718iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5719{
5720#ifdef VBOX_STRICT
5721 switch (cbLocal)
5722 {
5723 case sizeof(int8_t): Assert(cShift < 8); break;
5724 case sizeof(int16_t): Assert(cShift < 16); break;
5725 case sizeof(int32_t): Assert(cShift < 32); break;
5726 case sizeof(int64_t): Assert(cShift < 64); break;
5727 default: AssertFailedBreak();
5728 }
5729#endif
5730
5731 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5732 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5733
5734 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5735 if (cbLocal == sizeof(uint8_t))
5736 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5737 else if (cbLocal == sizeof(uint16_t))
5738 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5739
5740 if (cbLocal <= sizeof(uint32_t))
5741 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5742 else
5743 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5744
5745 iemNativeVarRegisterRelease(pReNative, idxVar);
5746 return off;
5747}
5748
5749
5750#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5751 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5752
5753#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5754 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5755
5756#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5757 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5758
5759/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5760DECL_INLINE_THROW(uint32_t)
5761iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5762{
5763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5764 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5765 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5766 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5767
5768 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5769 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5770
5771 /* Need to sign extend the value. */
5772 if (cbLocal <= sizeof(uint32_t))
5773 {
5774/** @todo ARM64: In case of boredone, the extended add instruction can do the
5775 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5776 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5777
5778 switch (cbLocal)
5779 {
5780 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5781 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5782 default: AssertFailed();
5783 }
5784
5785 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5786 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5787 }
5788 else
5789 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5790
5791 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5792 iemNativeVarRegisterRelease(pReNative, idxVar);
5793 return off;
5794}
5795
5796
5797
5798/*********************************************************************************************************************************
5799* EFLAGS *
5800*********************************************************************************************************************************/
5801
5802#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5803# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5804#else
5805# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5806 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5807
5808DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5809{
5810 if (fEflOutput)
5811 {
5812 PVMCPUCC const pVCpu = pReNative->pVCpu;
5813# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5814 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5815 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5816 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5817# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5818 if (fEflOutput & (a_fEfl)) \
5819 { \
5820 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5821 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5822 else \
5823 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5824 } else do { } while (0)
5825# else
5826 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5827 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5828 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5829# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5830 if (fEflOutput & (a_fEfl)) \
5831 { \
5832 if (LivenessClobbered.a_fLivenessMember) \
5833 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5834 else if (LivenessDelayable.a_fLivenessMember) \
5835 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5836 else \
5837 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5838 } else do { } while (0)
5839# endif
5840 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5841 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5842 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5843 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5844 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5845 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5846 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5847# undef CHECK_FLAG_AND_UPDATE_STATS
5848 }
5849 RT_NOREF(fEflInput);
5850}
5851#endif /* VBOX_WITH_STATISTICS */
5852
5853#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5854#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5855 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5856 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5857
5858/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5859template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5860 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5861DECL_INLINE_THROW(uint32_t)
5862iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5863{
5864 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5865 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5866 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5867
5868#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5869# ifdef VBOX_STRICT
5870 if ( pReNative->idxCurCall != 0
5871 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5872 {
5873 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5874 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5875# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5876 AssertMsg( !(fBoth & (a_fElfConst)) \
5877 || (!(a_fEflInput & (a_fElfConst)) \
5878 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5879 : !(a_fEflOutput & (a_fElfConst)) \
5880 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5881 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5882 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5883 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5884 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5885 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5886 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5887 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5888 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5889 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5890# undef ASSERT_ONE_EFL
5891 }
5892# endif
5893#endif
5894
5895 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5896 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5897
5898 /** @todo This could be prettier...*/
5899 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5900 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5901 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5902 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5903 Assert(pVar->idxReg == UINT8_MAX);
5904 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5905 {
5906 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5907 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5908 * that's counter productive... */
5909 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5910 a_fLivenessEflInput, a_fLivenessEflOutput);
5911 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5912 }
5913 else
5914 {
5915 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5916 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5917 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5918 a_fLivenessEflInput, a_fLivenessEflOutput);
5919 if (idxGstReg != UINT8_MAX)
5920 {
5921 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5922 iemNativeRegFreeTmp(pReNative, idxGstReg);
5923 }
5924 else
5925 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxVarReg);
5926 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5927 }
5928 return off;
5929}
5930
5931
5932
5933/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5934 * start using it with custom native code emission (inlining assembly
5935 * instruction helpers). */
5936#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5937#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5938 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5939 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5940 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5941 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5942
5943#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5944#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5945 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5946 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5947 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5948 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5949
5950/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5951template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5952 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5953DECL_INLINE_THROW(uint32_t)
5954iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5955{
5956 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5957 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5958
5959#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5960# ifdef VBOX_STRICT
5961 if ( pReNative->idxCurCall != 0
5962 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5963 {
5964 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5965# define ASSERT_ONE_EFL(a_idxField) \
5966 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5967 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5968 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5969 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5970 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5971 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5972 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5973 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
5974 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
5975 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
5976 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
5977 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
5978 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
5979 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
5980# undef ASSERT_ONE_EFL
5981 }
5982# endif
5983#endif
5984
5985#ifdef VBOX_STRICT
5986 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5987 uint32_t offFixup = off;
5988 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5989 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5990 iemNativeFixupFixedJump(pReNative, offFixup, off);
5991
5992 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5993 offFixup = off;
5994 off = iemNativeEmitJzToFixed(pReNative, off, off);
5995 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5996 iemNativeFixupFixedJump(pReNative, offFixup, off);
5997
5998 /** @todo validate that only bits in the a_fEflOutput mask changed. */
5999#endif
6000
6001#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6002 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6003 {
6004 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6005 if (pReNative->fSkippingEFlags)
6006 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6007 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6008 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6009 pReNative->fSkippingEFlags = 0;
6010 else
6011 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6012# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6013 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6014 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6015 else
6016 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6017 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6018# endif
6019 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6020 }
6021#endif
6022
6023 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6024 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxReg);
6025 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6026 return off;
6027}
6028
6029
6030typedef enum IEMNATIVEMITEFLOP
6031{
6032 kIemNativeEmitEflOp_Set,
6033 kIemNativeEmitEflOp_Clear,
6034 kIemNativeEmitEflOp_Flip
6035} IEMNATIVEMITEFLOP;
6036
6037#define IEM_MC_SET_EFL_BIT(a_fBit) \
6038 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6039
6040#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6041 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6042
6043#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6044 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6045
6046/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6047template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6048DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6049{
6050 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6051 a_enmOp == kIemNativeEmitEflOp_Flip
6052 ? a_fLivenessEflBit : 0,
6053 a_fLivenessEflBit);
6054
6055 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6056 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6057 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6058 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6059 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6060 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6061 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6062 else
6063 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6064 || a_enmOp == kIemNativeEmitEflOp_Clear
6065 || a_enmOp == kIemNativeEmitEflOp_Flip);
6066
6067 /** @todo No delayed writeback for EFLAGS right now. */
6068 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
6069
6070 /* Free but don't flush the EFLAGS register. */
6071 iemNativeRegFreeTmp(pReNative, idxEflReg);
6072
6073#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6074 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6075 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6076 && (a_fEflBit & X86_EFL_STATUS_BITS))
6077 {
6078 if (pReNative->fSkippingEFlags)
6079 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6080 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6081 pReNative->fSkippingEFlags &= ~a_fEflBit;
6082# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6083 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6084# endif
6085 }
6086#endif
6087
6088 return off;
6089}
6090
6091
6092/*********************************************************************************************************************************
6093* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6094*********************************************************************************************************************************/
6095
6096#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6097 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6098
6099#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6100 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6101
6102#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6103 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6104
6105
6106/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6107 * IEM_MC_FETCH_SREG_ZX_U64. */
6108DECL_INLINE_THROW(uint32_t)
6109iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6110{
6111 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6112 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6113 Assert(iSReg < X86_SREG_COUNT);
6114
6115 /*
6116 * For now, we will not create a shadow copy of a selector. The rational
6117 * is that since we do not recompile the popping and loading of segment
6118 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6119 * pushing and moving to registers, there is only a small chance that the
6120 * shadow copy will be accessed again before the register is reloaded. One
6121 * scenario would be nested called in 16-bit code, but I doubt it's worth
6122 * the extra register pressure atm.
6123 *
6124 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6125 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6126 * store scencario covered at present (r160730).
6127 */
6128 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6129 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6130 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6131 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6132 return off;
6133}
6134
6135
6136
6137/*********************************************************************************************************************************
6138* Register references. *
6139*********************************************************************************************************************************/
6140
6141#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6142 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6143
6144#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6145 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6146
6147/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6148DECL_INLINE_THROW(uint32_t)
6149iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6150{
6151 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6152 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6153 Assert(iGRegEx < 20);
6154
6155 if (iGRegEx < 16)
6156 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6157 else
6158 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6159
6160 /* If we've delayed writing back the register value, flush it now. */
6161 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGRegEx & 15);
6162
6163 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6164 if (!fConst)
6165 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6166
6167 return off;
6168}
6169
6170#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6171 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6172
6173#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6174 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6175
6176#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6177 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6178
6179#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6180 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6181
6182#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6183 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6184
6185#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6186 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6187
6188#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6189 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6190
6191#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6192 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6193
6194#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6195 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6196
6197#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6198 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6199
6200/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6201DECL_INLINE_THROW(uint32_t)
6202iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6203{
6204 Assert(iGReg < 16);
6205 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6207
6208 /* If we've delayed writing back the register value, flush it now. */
6209 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGReg);
6210
6211 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6212 if (!fConst)
6213 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6214
6215 return off;
6216}
6217
6218
6219#undef IEM_MC_REF_EFLAGS /* should not be used. */
6220#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6221 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6222 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6223
6224/** Handles IEM_MC_REF_EFLAGS. */
6225template<uint32_t const a_fEflOutput>
6226DECL_INLINE_THROW(uint32_t)
6227iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6228{
6229 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6230 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6231
6232#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6233 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6234 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6235 if (pReNative->fSkippingEFlags)
6236 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6237 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6238 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6239# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6240
6241 /* Updating the skipping according to the outputs is a little early, but
6242 we don't have any other hooks for references atm. */
6243 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6244 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6245 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6246 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6247 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6248# endif
6249
6250 /* This ASSUMES that EFLAGS references are not taken before use. */
6251 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6252
6253#endif
6254 RT_NOREF(fEflInput);
6255
6256 /* If we've delayed writing back the register value, flush it now. */
6257 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_EFlags>(pReNative, off, 0);
6258
6259 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6260 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6261
6262 return off;
6263}
6264
6265
6266/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6267 * different code from threaded recompiler, maybe it would be helpful. For now
6268 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6269#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6270
6271
6272#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6273 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6274
6275#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6276 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6277
6278#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6279 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6280
6281#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6282 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6283
6284/* Just being paranoid here. */
6285#ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6286AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6287AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6288AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6289AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6290#endif
6291AssertCompileMemberOffset(X86XMMREG, au64, 0);
6292AssertCompileMemberOffset(X86XMMREG, au32, 0);
6293AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6294AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6295
6296#define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6297 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6298#define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6299 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6300#define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6301 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6302#define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6303 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6304
6305/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6306DECL_INLINE_THROW(uint32_t)
6307iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6308{
6309 Assert(iXReg < 16);
6310 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6311 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6312
6313 /* If we've delayed writing back the register value, flush it now. */
6314 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_XReg>(pReNative, off, iXReg);
6315
6316 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6317 if (!fConst)
6318 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6319
6320 return off;
6321}
6322
6323
6324
6325/*********************************************************************************************************************************
6326* Effective Address Calculation *
6327*********************************************************************************************************************************/
6328#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6329 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6330
6331/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6332 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6333DECL_INLINE_THROW(uint32_t)
6334iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6335 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6336{
6337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6338
6339 /*
6340 * Handle the disp16 form with no registers first.
6341 *
6342 * Convert to an immediate value, as that'll delay the register allocation
6343 * and assignment till the memory access / call / whatever and we can use
6344 * a more appropriate register (or none at all).
6345 */
6346 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6347 {
6348 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6349 return off;
6350 }
6351
6352 /* Determin the displacment. */
6353 uint16_t u16EffAddr;
6354 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6355 {
6356 case 0: u16EffAddr = 0; break;
6357 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6358 case 2: u16EffAddr = u16Disp; break;
6359 default: AssertFailedStmt(u16EffAddr = 0);
6360 }
6361
6362 /* Determine the registers involved. */
6363 uint8_t idxGstRegBase;
6364 uint8_t idxGstRegIndex;
6365 switch (bRm & X86_MODRM_RM_MASK)
6366 {
6367 case 0:
6368 idxGstRegBase = X86_GREG_xBX;
6369 idxGstRegIndex = X86_GREG_xSI;
6370 break;
6371 case 1:
6372 idxGstRegBase = X86_GREG_xBX;
6373 idxGstRegIndex = X86_GREG_xDI;
6374 break;
6375 case 2:
6376 idxGstRegBase = X86_GREG_xBP;
6377 idxGstRegIndex = X86_GREG_xSI;
6378 break;
6379 case 3:
6380 idxGstRegBase = X86_GREG_xBP;
6381 idxGstRegIndex = X86_GREG_xDI;
6382 break;
6383 case 4:
6384 idxGstRegBase = X86_GREG_xSI;
6385 idxGstRegIndex = UINT8_MAX;
6386 break;
6387 case 5:
6388 idxGstRegBase = X86_GREG_xDI;
6389 idxGstRegIndex = UINT8_MAX;
6390 break;
6391 case 6:
6392 idxGstRegBase = X86_GREG_xBP;
6393 idxGstRegIndex = UINT8_MAX;
6394 break;
6395#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6396 default:
6397#endif
6398 case 7:
6399 idxGstRegBase = X86_GREG_xBX;
6400 idxGstRegIndex = UINT8_MAX;
6401 break;
6402 }
6403
6404 /*
6405 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6406 */
6407 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6408 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6409 kIemNativeGstRegUse_ReadOnly);
6410 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6411 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6412 kIemNativeGstRegUse_ReadOnly)
6413 : UINT8_MAX;
6414#ifdef RT_ARCH_AMD64
6415 if (idxRegIndex == UINT8_MAX)
6416 {
6417 if (u16EffAddr == 0)
6418 {
6419 /* movxz ret, base */
6420 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6421 }
6422 else
6423 {
6424 /* lea ret32, [base64 + disp32] */
6425 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6426 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6427 if (idxRegRet >= 8 || idxRegBase >= 8)
6428 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6429 pbCodeBuf[off++] = 0x8d;
6430 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6431 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6432 else
6433 {
6434 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6435 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6436 }
6437 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6438 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6439 pbCodeBuf[off++] = 0;
6440 pbCodeBuf[off++] = 0;
6441 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6442
6443 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6444 }
6445 }
6446 else
6447 {
6448 /* lea ret32, [index64 + base64 (+ disp32)] */
6449 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6450 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6451 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6452 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6453 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6454 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6455 pbCodeBuf[off++] = 0x8d;
6456 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6457 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6458 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6459 if (bMod == X86_MOD_MEM4)
6460 {
6461 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6462 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6463 pbCodeBuf[off++] = 0;
6464 pbCodeBuf[off++] = 0;
6465 }
6466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6467 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6468 }
6469
6470#elif defined(RT_ARCH_ARM64)
6471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6472 if (u16EffAddr == 0)
6473 {
6474 if (idxRegIndex == UINT8_MAX)
6475 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6476 else
6477 {
6478 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6479 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6480 }
6481 }
6482 else
6483 {
6484 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6485 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6486 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6487 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6488 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6489 else
6490 {
6491 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6492 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6493 }
6494 if (idxRegIndex != UINT8_MAX)
6495 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6496 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6497 }
6498
6499#else
6500# error "port me"
6501#endif
6502
6503 if (idxRegIndex != UINT8_MAX)
6504 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6505 iemNativeRegFreeTmp(pReNative, idxRegBase);
6506 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6507 return off;
6508}
6509
6510
6511#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6512 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6513
6514/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6515 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6516DECL_INLINE_THROW(uint32_t)
6517iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6518 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6519{
6520 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6521
6522 /*
6523 * Handle the disp32 form with no registers first.
6524 *
6525 * Convert to an immediate value, as that'll delay the register allocation
6526 * and assignment till the memory access / call / whatever and we can use
6527 * a more appropriate register (or none at all).
6528 */
6529 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6530 {
6531 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6532 return off;
6533 }
6534
6535 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6536 uint32_t u32EffAddr = 0;
6537 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6538 {
6539 case 0: break;
6540 case 1: u32EffAddr = (int8_t)u32Disp; break;
6541 case 2: u32EffAddr = u32Disp; break;
6542 default: AssertFailed();
6543 }
6544
6545 /* Get the register (or SIB) value. */
6546 uint8_t idxGstRegBase = UINT8_MAX;
6547 uint8_t idxGstRegIndex = UINT8_MAX;
6548 uint8_t cShiftIndex = 0;
6549 switch (bRm & X86_MODRM_RM_MASK)
6550 {
6551 case 0: idxGstRegBase = X86_GREG_xAX; break;
6552 case 1: idxGstRegBase = X86_GREG_xCX; break;
6553 case 2: idxGstRegBase = X86_GREG_xDX; break;
6554 case 3: idxGstRegBase = X86_GREG_xBX; break;
6555 case 4: /* SIB */
6556 {
6557 /* index /w scaling . */
6558 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6559 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6560 {
6561 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6562 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6563 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6564 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6565 case 4: cShiftIndex = 0; /*no index*/ break;
6566 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6567 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6568 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6569 }
6570
6571 /* base */
6572 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6573 {
6574 case 0: idxGstRegBase = X86_GREG_xAX; break;
6575 case 1: idxGstRegBase = X86_GREG_xCX; break;
6576 case 2: idxGstRegBase = X86_GREG_xDX; break;
6577 case 3: idxGstRegBase = X86_GREG_xBX; break;
6578 case 4:
6579 idxGstRegBase = X86_GREG_xSP;
6580 u32EffAddr += uSibAndRspOffset >> 8;
6581 break;
6582 case 5:
6583 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6584 idxGstRegBase = X86_GREG_xBP;
6585 else
6586 {
6587 Assert(u32EffAddr == 0);
6588 u32EffAddr = u32Disp;
6589 }
6590 break;
6591 case 6: idxGstRegBase = X86_GREG_xSI; break;
6592 case 7: idxGstRegBase = X86_GREG_xDI; break;
6593 }
6594 break;
6595 }
6596 case 5: idxGstRegBase = X86_GREG_xBP; break;
6597 case 6: idxGstRegBase = X86_GREG_xSI; break;
6598 case 7: idxGstRegBase = X86_GREG_xDI; break;
6599 }
6600
6601 /*
6602 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6603 * the start of the function.
6604 */
6605 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6606 {
6607 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6608 return off;
6609 }
6610
6611 /*
6612 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6613 */
6614 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6615 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6616 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6617 kIemNativeGstRegUse_ReadOnly);
6618 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6619 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6620 kIemNativeGstRegUse_ReadOnly);
6621
6622 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6623 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6624 {
6625 idxRegBase = idxRegIndex;
6626 idxRegIndex = UINT8_MAX;
6627 }
6628
6629#ifdef RT_ARCH_AMD64
6630 if (idxRegIndex == UINT8_MAX)
6631 {
6632 if (u32EffAddr == 0)
6633 {
6634 /* mov ret, base */
6635 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6636 }
6637 else
6638 {
6639 /* lea ret32, [base64 + disp32] */
6640 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6641 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6642 if (idxRegRet >= 8 || idxRegBase >= 8)
6643 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6644 pbCodeBuf[off++] = 0x8d;
6645 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6646 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6647 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6648 else
6649 {
6650 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6651 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6652 }
6653 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6654 if (bMod == X86_MOD_MEM4)
6655 {
6656 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6657 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6658 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6659 }
6660 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6661 }
6662 }
6663 else
6664 {
6665 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6666 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6667 if (idxRegBase == UINT8_MAX)
6668 {
6669 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6670 if (idxRegRet >= 8 || idxRegIndex >= 8)
6671 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6672 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6673 pbCodeBuf[off++] = 0x8d;
6674 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6675 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6676 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6677 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6678 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6679 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6680 }
6681 else
6682 {
6683 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6684 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6685 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6686 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6687 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6688 pbCodeBuf[off++] = 0x8d;
6689 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6690 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6691 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6692 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6693 if (bMod != X86_MOD_MEM0)
6694 {
6695 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6696 if (bMod == X86_MOD_MEM4)
6697 {
6698 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6699 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6700 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6701 }
6702 }
6703 }
6704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6705 }
6706
6707#elif defined(RT_ARCH_ARM64)
6708 if (u32EffAddr == 0)
6709 {
6710 if (idxRegIndex == UINT8_MAX)
6711 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6712 else if (idxRegBase == UINT8_MAX)
6713 {
6714 if (cShiftIndex == 0)
6715 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6716 else
6717 {
6718 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6719 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6720 }
6721 }
6722 else
6723 {
6724 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6725 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6726 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6727 }
6728 }
6729 else
6730 {
6731 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6732 {
6733 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6734 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6735 }
6736 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6737 {
6738 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6739 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6740 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6741 }
6742 else
6743 {
6744 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6745 if (idxRegBase != UINT8_MAX)
6746 {
6747 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6748 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6749 }
6750 }
6751 if (idxRegIndex != UINT8_MAX)
6752 {
6753 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6754 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6755 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6756 }
6757 }
6758
6759#else
6760# error "port me"
6761#endif
6762
6763 if (idxRegIndex != UINT8_MAX)
6764 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6765 if (idxRegBase != UINT8_MAX)
6766 iemNativeRegFreeTmp(pReNative, idxRegBase);
6767 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6768 return off;
6769}
6770
6771
6772#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6773 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6774 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6775
6776#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6777 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6778 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6779
6780#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6781 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6782 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6783
6784/**
6785 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6786 *
6787 * @returns New off.
6788 * @param pReNative .
6789 * @param off .
6790 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6791 * bit 4 to REX.X. The two bits are part of the
6792 * REG sub-field, which isn't needed in this
6793 * function.
6794 * @param uSibAndRspOffset Two parts:
6795 * - The first 8 bits make up the SIB byte.
6796 * - The next 8 bits are the fixed RSP/ESP offset
6797 * in case of a pop [xSP].
6798 * @param u32Disp The displacement byte/word/dword, if any.
6799 * @param cbInstr The size of the fully decoded instruction. Used
6800 * for RIP relative addressing.
6801 * @param idxVarRet The result variable number.
6802 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6803 * when calculating the address.
6804 *
6805 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6806 */
6807DECL_INLINE_THROW(uint32_t)
6808iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6809 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6810{
6811 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6812
6813 /*
6814 * Special case the rip + disp32 form first.
6815 */
6816 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6817 {
6818 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6819 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6820 kIemNativeGstRegUse_ReadOnly);
6821 if (f64Bit)
6822 {
6823#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6824 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6825#else
6826 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6827#endif
6828#ifdef RT_ARCH_AMD64
6829 if ((int32_t)offFinalDisp == offFinalDisp)
6830 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6831 else
6832 {
6833 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6834 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6835 }
6836#else
6837 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6838#endif
6839 }
6840 else
6841 {
6842# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6843 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6844# else
6845 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6846# endif
6847 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6848 }
6849 iemNativeRegFreeTmp(pReNative, idxRegPc);
6850 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6851 return off;
6852 }
6853
6854 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6855 int64_t i64EffAddr = 0;
6856 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6857 {
6858 case 0: break;
6859 case 1: i64EffAddr = (int8_t)u32Disp; break;
6860 case 2: i64EffAddr = (int32_t)u32Disp; break;
6861 default: AssertFailed();
6862 }
6863
6864 /* Get the register (or SIB) value. */
6865 uint8_t idxGstRegBase = UINT8_MAX;
6866 uint8_t idxGstRegIndex = UINT8_MAX;
6867 uint8_t cShiftIndex = 0;
6868 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6869 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6870 else /* SIB: */
6871 {
6872 /* index /w scaling . */
6873 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6874 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6875 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6876 if (idxGstRegIndex == 4)
6877 {
6878 /* no index */
6879 cShiftIndex = 0;
6880 idxGstRegIndex = UINT8_MAX;
6881 }
6882
6883 /* base */
6884 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6885 if (idxGstRegBase == 4)
6886 {
6887 /* pop [rsp] hack */
6888 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6889 }
6890 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6891 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6892 {
6893 /* mod=0 and base=5 -> disp32, no base reg. */
6894 Assert(i64EffAddr == 0);
6895 i64EffAddr = (int32_t)u32Disp;
6896 idxGstRegBase = UINT8_MAX;
6897 }
6898 }
6899
6900 /*
6901 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6902 * the start of the function.
6903 */
6904 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6905 {
6906 if (f64Bit)
6907 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6908 else
6909 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6910 return off;
6911 }
6912
6913 /*
6914 * Now emit code that calculates:
6915 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6916 * or if !f64Bit:
6917 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6918 */
6919 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6920 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6921 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6922 kIemNativeGstRegUse_ReadOnly);
6923 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6924 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6925 kIemNativeGstRegUse_ReadOnly);
6926
6927 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6928 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6929 {
6930 idxRegBase = idxRegIndex;
6931 idxRegIndex = UINT8_MAX;
6932 }
6933
6934#ifdef RT_ARCH_AMD64
6935 uint8_t bFinalAdj;
6936 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6937 bFinalAdj = 0; /* likely */
6938 else
6939 {
6940 /* pop [rsp] with a problematic disp32 value. Split out the
6941 RSP offset and add it separately afterwards (bFinalAdj). */
6942 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6943 Assert(idxGstRegBase == X86_GREG_xSP);
6944 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6945 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6946 Assert(bFinalAdj != 0);
6947 i64EffAddr -= bFinalAdj;
6948 Assert((int32_t)i64EffAddr == i64EffAddr);
6949 }
6950 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6951//pReNative->pInstrBuf[off++] = 0xcc;
6952
6953 if (idxRegIndex == UINT8_MAX)
6954 {
6955 if (u32EffAddr == 0)
6956 {
6957 /* mov ret, base */
6958 if (f64Bit)
6959 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6960 else
6961 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6962 }
6963 else
6964 {
6965 /* lea ret, [base + disp32] */
6966 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6967 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6968 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6969 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6970 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6971 | (f64Bit ? X86_OP_REX_W : 0);
6972 pbCodeBuf[off++] = 0x8d;
6973 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6974 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6975 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6976 else
6977 {
6978 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6979 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6980 }
6981 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6982 if (bMod == X86_MOD_MEM4)
6983 {
6984 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6985 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6986 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6987 }
6988 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6989 }
6990 }
6991 else
6992 {
6993 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6994 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6995 if (idxRegBase == UINT8_MAX)
6996 {
6997 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6998 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6999 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7000 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7001 | (f64Bit ? X86_OP_REX_W : 0);
7002 pbCodeBuf[off++] = 0x8d;
7003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7004 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7005 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7006 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7007 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7008 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7009 }
7010 else
7011 {
7012 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7013 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7014 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7015 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7016 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7017 | (f64Bit ? X86_OP_REX_W : 0);
7018 pbCodeBuf[off++] = 0x8d;
7019 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7020 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7021 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7022 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7023 if (bMod != X86_MOD_MEM0)
7024 {
7025 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7026 if (bMod == X86_MOD_MEM4)
7027 {
7028 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7029 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7030 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7031 }
7032 }
7033 }
7034 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7035 }
7036
7037 if (!bFinalAdj)
7038 { /* likely */ }
7039 else
7040 {
7041 Assert(f64Bit);
7042 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7043 }
7044
7045#elif defined(RT_ARCH_ARM64)
7046 if (i64EffAddr == 0)
7047 {
7048 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7049 if (idxRegIndex == UINT8_MAX)
7050 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7051 else if (idxRegBase != UINT8_MAX)
7052 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7053 f64Bit, false /*fSetFlags*/, cShiftIndex);
7054 else
7055 {
7056 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7057 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7058 }
7059 }
7060 else
7061 {
7062 if (f64Bit)
7063 { /* likely */ }
7064 else
7065 i64EffAddr = (int32_t)i64EffAddr;
7066
7067 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7068 {
7069 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7070 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7071 }
7072 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7073 {
7074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7075 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7076 }
7077 else
7078 {
7079 if (f64Bit)
7080 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7081 else
7082 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7083 if (idxRegBase != UINT8_MAX)
7084 {
7085 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7086 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7087 }
7088 }
7089 if (idxRegIndex != UINT8_MAX)
7090 {
7091 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7092 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7093 f64Bit, false /*fSetFlags*/, cShiftIndex);
7094 }
7095 }
7096
7097#else
7098# error "port me"
7099#endif
7100
7101 if (idxRegIndex != UINT8_MAX)
7102 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7103 if (idxRegBase != UINT8_MAX)
7104 iemNativeRegFreeTmp(pReNative, idxRegBase);
7105 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7106 return off;
7107}
7108
7109
7110/*********************************************************************************************************************************
7111* Memory fetches and stores common *
7112*********************************************************************************************************************************/
7113
7114typedef enum IEMNATIVEMITMEMOP
7115{
7116 kIemNativeEmitMemOp_Store = 0,
7117 kIemNativeEmitMemOp_Fetch,
7118 kIemNativeEmitMemOp_Fetch_Zx_U16,
7119 kIemNativeEmitMemOp_Fetch_Zx_U32,
7120 kIemNativeEmitMemOp_Fetch_Zx_U64,
7121 kIemNativeEmitMemOp_Fetch_Sx_U16,
7122 kIemNativeEmitMemOp_Fetch_Sx_U32,
7123 kIemNativeEmitMemOp_Fetch_Sx_U64
7124} IEMNATIVEMITMEMOP;
7125
7126/** Emits code for IEM_MC_FETCH_MEM_SEG_U8/16/32/64 and IEM_MC_STORE_MEM_SEG_U8/16/32/64,
7127 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7128 * (with iSegReg = UINT8_MAX). */
7129template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7130DECL_INLINE_THROW(uint32_t)
7131iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7132 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7133{
7134 /*
7135 * Assert sanity.
7136 */
7137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7138 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7139 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7140 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7141 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7143 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7144 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7145 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7146 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7147 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7148 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7149 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7150 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7151 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7152#ifdef VBOX_STRICT
7153 if (iSegReg == UINT8_MAX)
7154 {
7155 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7156 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7157 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7158 switch (a_cbMem)
7159 {
7160 case 1:
7161 Assert( pfnFunction
7162 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7163 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7164 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7165 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7166 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7167 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7168 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7169 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7170 : UINT64_C(0xc000b000a0009000) ));
7171 Assert(!a_fAlignMaskAndCtl);
7172 break;
7173 case 2:
7174 Assert( pfnFunction
7175 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7176 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7177 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7178 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7179 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7180 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7181 : UINT64_C(0xc000b000a0009000) ));
7182 Assert(a_fAlignMaskAndCtl <= 1);
7183 break;
7184 case 4:
7185 Assert( pfnFunction
7186 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7187 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7188 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7189 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7190 : UINT64_C(0xc000b000a0009000) ));
7191 Assert(a_fAlignMaskAndCtl <= 3);
7192 break;
7193 case 8:
7194 Assert( pfnFunction
7195 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7196 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7197 : UINT64_C(0xc000b000a0009000) ));
7198 Assert(a_fAlignMaskAndCtl <= 7);
7199 break;
7200 case sizeof(RTUINT128U):
7201 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7202 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7203 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7204 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7205 || ( a_enmOp == kIemNativeEmitMemOp_Store
7206 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7207 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7208 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7209 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7210 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7211 : a_fAlignMaskAndCtl <= 15U);
7212 break;
7213 case sizeof(RTUINT256U):
7214 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7215 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7216 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7217 || ( a_enmOp == kIemNativeEmitMemOp_Store
7218 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7219 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7220 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7221 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7222 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7223 : a_fAlignMaskAndCtl <= 31);
7224 break;
7225 }
7226 }
7227 else
7228 {
7229 Assert(iSegReg < 6);
7230 switch (a_cbMem)
7231 {
7232 case 1:
7233 Assert( pfnFunction
7234 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7235 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7236 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7237 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7238 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7239 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7240 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7241 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7242 : UINT64_C(0xc000b000a0009000) ));
7243 Assert(!a_fAlignMaskAndCtl);
7244 break;
7245 case 2:
7246 Assert( pfnFunction
7247 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7248 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7249 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7250 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7251 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7252 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7253 : UINT64_C(0xc000b000a0009000) ));
7254 Assert(a_fAlignMaskAndCtl <= 1);
7255 break;
7256 case 4:
7257 Assert( pfnFunction
7258 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7259 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7260 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7261 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7262 : UINT64_C(0xc000b000a0009000) ));
7263 Assert(a_fAlignMaskAndCtl <= 3);
7264 break;
7265 case 8:
7266 Assert( pfnFunction
7267 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7268 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7269 : UINT64_C(0xc000b000a0009000) ));
7270 Assert(a_fAlignMaskAndCtl <= 7);
7271 break;
7272 case sizeof(RTUINT128U):
7273 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7274 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7275 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7276 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7277 || ( a_enmOp == kIemNativeEmitMemOp_Store
7278 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7279 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7280 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7281 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7282 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7283 : a_fAlignMaskAndCtl <= 15);
7284 break;
7285 case sizeof(RTUINT256U):
7286 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7287 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7288 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7289 || ( a_enmOp == kIemNativeEmitMemOp_Store
7290 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7291 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7292 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7293 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7294 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7295 : a_fAlignMaskAndCtl <= 31);
7296 break;
7297 }
7298 }
7299#endif
7300
7301#ifdef VBOX_STRICT
7302 /*
7303 * Check that the fExec flags we've got make sense.
7304 */
7305 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7306#endif
7307
7308 /*
7309 * To keep things simple we have to commit any pending writes first as we
7310 * may end up making calls.
7311 */
7312 /** @todo we could postpone this till we make the call and reload the
7313 * registers after returning from the call. Not sure if that's sensible or
7314 * not, though. */
7315#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7316 off = iemNativeRegFlushPendingWrites(pReNative, off);
7317#else
7318 /* The program counter is treated differently for now. */
7319 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7320#endif
7321
7322#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7323 /*
7324 * Move/spill/flush stuff out of call-volatile registers.
7325 * This is the easy way out. We could contain this to the tlb-miss branch
7326 * by saving and restoring active stuff here.
7327 */
7328 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7329#endif
7330
7331 /*
7332 * Define labels and allocate the result register (trying for the return
7333 * register if we can).
7334 */
7335 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7336 RT_CONSTEXPR
7337 bool const fSimdRegValues = a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U);
7338 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7339 : fSimdRegValues
7340 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off)
7341 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7342 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7343 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7344 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem, offDisp);
7345 uint8_t const idxRegValueStore = a_enmOp != kIemNativeEmitMemOp_Store
7346 || TlbState.fSkip
7347 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7348 ? UINT8_MAX
7349 : fSimdRegValues
7350 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7351 : iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off);
7352 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7353 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7354 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7355 : UINT32_MAX;
7356
7357 /*
7358 * Jump to the TLB lookup code.
7359 */
7360 if (!TlbState.fSkip)
7361 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7362
7363 /*
7364 * TlbMiss:
7365 *
7366 * Call helper to do the fetching.
7367 * We flush all guest register shadow copies here.
7368 */
7369 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7370
7371#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7373#else
7374 RT_NOREF(idxInstr);
7375#endif
7376
7377#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7378 if (pReNative->Core.offPc)
7379 {
7380 /*
7381 * Update the program counter but restore it at the end of the TlbMiss branch.
7382 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7383 * which are hopefully much more frequent, reducing the amount of memory accesses.
7384 */
7385 /* Allocate a temporary PC register. */
7386/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7387 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7388 kIemNativeGstRegUse_ForUpdate);
7389
7390 /* Perform the addition and store the result. */
7391 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7392 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7393# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7394 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7395# endif
7396
7397 /* Free and flush the PC register. */
7398 iemNativeRegFreeTmp(pReNative, idxPcReg);
7399 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7400 }
7401#endif
7402
7403#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7404 /* Save variables in volatile registers. */
7405 uint32_t const fHstGprsNotToSave = TlbState.getRegsNotToSave()
7406 | (idxRegMemResult < 32 ? RT_BIT_32(idxRegMemResult) : 0)
7407#if defined(_MSC_VER) /* Workaround for stupid compiler (2019). */ \
7408 || (defined(__clang__) && defined(RT_OS_LINUX))
7409 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch & 0x1f) : 0);
7410#else
7411 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch) : 0);
7412#endif
7413 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstGprsNotToSave);
7414#endif
7415
7416 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7417 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7418 if RT_CONSTEXPR_IF(fSimdRegValues)
7419 {
7420 /*
7421 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7422 *
7423 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7424 * which must not be freed or the value loaded into the register will not be synced into the register
7425 * further down the road because the variable doesn't know it had a variable assigned.
7426 *
7427 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7428 * as it will be overwritten anyway.
7429 */
7430 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7431 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7432 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7433 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7434 }
7435 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7436 {
7437 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7438 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7439#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7440 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7441#else
7442 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7443 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7444#endif
7445 }
7446
7447 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7448 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7449#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7450 fVolGregMask);
7451#else
7452 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7453#endif
7454
7455 if RT_CONSTEXPR_IF(!a_fFlat)
7456 {
7457 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7458 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7459 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7460 }
7461
7462#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7463 /* Do delayed EFLAGS calculations. */
7464 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || fSimdRegValues)
7465 {
7466 if RT_CONSTEXPR_IF(a_fFlat)
7467 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7468 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7469 fHstGprsNotToSave);
7470 else
7471 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7472 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7473 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7474 fHstGprsNotToSave);
7475 }
7476 else if RT_CONSTEXPR_IF(a_fFlat)
7477 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7478 fHstGprsNotToSave);
7479 else
7480 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7481 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7482 fHstGprsNotToSave);
7483#endif
7484
7485 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7487
7488 /* Done setting up parameters, make the call. */
7489 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7490
7491 /*
7492 * Put the result in the right register if this is a fetch.
7493 */
7494 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7495 {
7496 if RT_CONSTEXPR_IF(fSimdRegValues)
7497 {
7498 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7499
7500 /* Sync the value on the stack with the host register assigned to the variable. */
7501 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7502 }
7503 else
7504 {
7505 Assert(idxRegValueFetch == pVarValue->idxReg);
7506 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7507 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7508 }
7509 }
7510
7511#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7512 /* Restore variables and guest shadow registers to volatile registers. */
7513 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstGprsNotToSave);
7514 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7515#endif
7516
7517#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7518 if (pReNative->Core.offPc)
7519 {
7520 /*
7521 * Time to restore the program counter to its original value.
7522 */
7523 /* Allocate a temporary PC register. */
7524 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7525 kIemNativeGstRegUse_ForUpdate);
7526
7527 /* Restore the original value. */
7528 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7529 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7530
7531 /* Free and flush the PC register. */
7532 iemNativeRegFreeTmp(pReNative, idxPcReg);
7533 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7534 }
7535#endif
7536
7537#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7538 if (!TlbState.fSkip)
7539 {
7540 /* end of TlbMiss - Jump to the done label. */
7541 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7542 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7543
7544 /*
7545 * TlbLookup:
7546 */
7547 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl,
7548 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ
7549 >(pReNative, off, &TlbState, iSegReg, idxLabelTlbLookup, idxLabelTlbMiss,
7550 idxRegMemResult, offDisp);
7551
7552 /*
7553 * Emit code to do the actual storing / fetching.
7554 */
7555 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7556# ifdef IEM_WITH_TLB_STATISTICS
7557 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7558 a_enmOp == kIemNativeEmitMemOp_Store
7559 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7560 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7561# endif
7562 switch (a_enmOp)
7563 {
7564 case kIemNativeEmitMemOp_Store:
7565 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7566 {
7567 switch (a_cbMem)
7568 {
7569 case 1:
7570 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7571 break;
7572 case 2:
7573 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7574 break;
7575 case 4:
7576 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7577 break;
7578 case 8:
7579 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7580 break;
7581 case sizeof(RTUINT128U):
7582 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7583 break;
7584 case sizeof(RTUINT256U):
7585 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7586 break;
7587 default:
7588 AssertFailed();
7589 }
7590 }
7591 else
7592 {
7593 switch (a_cbMem)
7594 {
7595 case 1:
7596 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7597 idxRegMemResult, TlbState.idxReg1);
7598 break;
7599 case 2:
7600 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7601 idxRegMemResult, TlbState.idxReg1);
7602 break;
7603 case 4:
7604 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7605 idxRegMemResult, TlbState.idxReg1);
7606 break;
7607 case 8:
7608 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7609 idxRegMemResult, TlbState.idxReg1);
7610 break;
7611 default:
7612 AssertFailed();
7613 }
7614 }
7615 break;
7616
7617 case kIemNativeEmitMemOp_Fetch:
7618 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7619 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7620 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7621 switch (a_cbMem)
7622 {
7623 case 1:
7624 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7625 break;
7626 case 2:
7627 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7628 break;
7629 case 4:
7630 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7631 break;
7632 case 8:
7633 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7634 break;
7635 case sizeof(RTUINT128U):
7636 /*
7637 * No need to sync back the register with the stack, this is done by the generic variable handling
7638 * code if there is a register assigned to a variable and the stack must be accessed.
7639 */
7640 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7641 break;
7642 case sizeof(RTUINT256U):
7643 /*
7644 * No need to sync back the register with the stack, this is done by the generic variable handling
7645 * code if there is a register assigned to a variable and the stack must be accessed.
7646 */
7647 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7648 break;
7649 default:
7650 AssertFailed();
7651 }
7652 break;
7653
7654 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7655 Assert(a_cbMem == 1);
7656 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7657 break;
7658
7659 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7660 Assert(a_cbMem == 1 || a_cbMem == 2);
7661 if (a_cbMem == 1)
7662 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7663 else
7664 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7665 break;
7666
7667 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7668 switch (a_cbMem)
7669 {
7670 case 1:
7671 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7672 break;
7673 case 2:
7674 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7675 break;
7676 case 4:
7677 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7678 break;
7679 default:
7680 AssertFailed();
7681 }
7682 break;
7683
7684 default:
7685 AssertFailed();
7686 }
7687
7688 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7689
7690 /*
7691 * TlbDone:
7692 */
7693 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7694
7695 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7696
7697# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7698 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7699 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7700# endif
7701 }
7702#else
7703 RT_NOREF(idxLabelTlbMiss);
7704#endif
7705
7706 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7707 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7708 return off;
7709}
7710
7711
7712
7713/*********************************************************************************************************************************
7714* Memory fetches (IEM_MEM_FETCH_XXX). *
7715*********************************************************************************************************************************/
7716
7717/* 8-bit segmented: */
7718#define IEM_MC_FETCH_MEM_SEG_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7719 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7720 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7721
7722#define IEM_MC_FETCH_MEM_SEG_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7723 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7724 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7725
7726#define IEM_MC_FETCH_MEM_SEG_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7727 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7728 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7729
7730#define IEM_MC_FETCH_MEM_SEG_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7731 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7732 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7733
7734#define IEM_MC_FETCH_MEM_SEG_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7735 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7736 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7737
7738#define IEM_MC_FETCH_MEM_SEG_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7739 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7740 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7741
7742#define IEM_MC_FETCH_MEM_SEG_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7743 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7744 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7745
7746/* 16-bit segmented: */
7747#define IEM_MC_FETCH_MEM_SEG_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7748 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7749 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7750
7751#define IEM_MC_FETCH_MEM_SEG_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7752 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7753 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7754
7755#define IEM_MC_FETCH_MEM_SEG_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7756 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7757 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7758
7759#define IEM_MC_FETCH_MEM_SEG_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7760 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7761 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7762
7763#define IEM_MC_FETCH_MEM_SEG_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7764 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7765 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7766
7767#define IEM_MC_FETCH_MEM_SEG_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7768 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7769 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7770
7771
7772/* 32-bit segmented: */
7773#define IEM_MC_FETCH_MEM_SEG_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7774 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7775 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7776
7777#define IEM_MC_FETCH_MEM_SEG_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7778 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7779 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7780
7781#define IEM_MC_FETCH_MEM_SEG_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7782 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7783 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7784
7785#define IEM_MC_FETCH_MEM_SEG_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7786 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7787 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7788
7789#define IEM_MC_FETCH_MEM_SEG_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7790 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7791 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7792
7793#define IEM_MC_FETCH_MEM_SEG_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7794 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7795 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7796 a_offDisp)
7797
7798#define IEM_MC_FETCH_MEM_SEG_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7799 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7800 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7801
7802#define IEM_MC_FETCH_MEM_SEG_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7803 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7804 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7805
7806#define IEM_MC_FETCH_MEM_SEG_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7807 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7808 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7809
7810AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7811#define IEM_MC_FETCH_MEM_SEG_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7812 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7813 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7814
7815
7816/* 64-bit segmented: */
7817#define IEM_MC_FETCH_MEM_SEG_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7818 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7819 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7820
7821AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7822#define IEM_MC_FETCH_MEM_SEG_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7823 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7824 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7825
7826
7827/* 8-bit flat: */
7828#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7829 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7830 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7831
7832#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7833 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7834 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7835
7836#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7837 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7838 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7839
7840#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7841 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7842 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7843
7844#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7845 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7846 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7847
7848#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7849 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7850 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7851
7852#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7853 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7854 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7855
7856
7857/* 16-bit flat: */
7858#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7859 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7860 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7861
7862#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7863 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7864 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7865
7866#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7867 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7868 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7869
7870#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7871 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7872 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7873
7874#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7875 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7876 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7877
7878#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7879 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7880 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7881
7882/* 32-bit flat: */
7883#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7884 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7885 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7886
7887#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7888 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7889 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7890
7891#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7892 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7893 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7894
7895#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7896 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7897 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7898
7899#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7900 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7901 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7902
7903#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7904 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7905 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7906
7907#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7908 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7909 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7910
7911#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7912 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7913 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7914
7915#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7916 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7917 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7918
7919#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7920 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7921 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7922
7923
7924/* 64-bit flat: */
7925#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7926 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7927 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7928
7929#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7930 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7931 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7932
7933
7934/* 128-bit segmented: */
7935#define IEM_MC_FETCH_MEM_SEG_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7936 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7937 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7938
7939#define IEM_MC_FETCH_MEM_SEG_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7940 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7941 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7942 kIemNativeEmitMemOp_Fetch>(\
7943 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7944
7945AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7946#define IEM_MC_FETCH_MEM_SEG_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7947 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7948 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7949 kIemNativeEmitMemOp_Fetch>(\
7950 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7951
7952#define IEM_MC_FETCH_MEM_SEG_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7953 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7954 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7955
7956#define IEM_MC_FETCH_MEM_SEG_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7957 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
7958 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7959
7960
7961/* 128-bit flat: */
7962#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7963 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7964 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7965
7966#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7967 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7968 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7969 kIemNativeEmitMemOp_Fetch, true>(\
7970 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7971
7972#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7973 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7974 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7975 kIemNativeEmitMemOp_Fetch, true>(\
7976 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7977
7978#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7979 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7980 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7981
7982#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7983 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7984 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7985
7986/* 256-bit segmented: */
7987#define IEM_MC_FETCH_MEM_SEG_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7988 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7989 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7990
7991#define IEM_MC_FETCH_MEM_SEG_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7992 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7993 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7994
7995#define IEM_MC_FETCH_MEM_SEG_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7996 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
7997 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
7998 kIemNativeEmitMemOp_Fetch>(\
7999 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8000
8001#define IEM_MC_FETCH_MEM_SEG_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8002 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8003 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8004
8005
8006/* 256-bit flat: */
8007#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8008 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8009 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8010
8011#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8012 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8013 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8014
8015#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8016 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8017 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8018 kIemNativeEmitMemOp_Fetch, true>(\
8019 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8020
8021#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8022 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8023 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8024
8025
8026
8027/*********************************************************************************************************************************
8028* Memory stores (IEM_MEM_STORE_XXX). *
8029*********************************************************************************************************************************/
8030
8031#define IEM_MC_STORE_MEM_SEG_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8032 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8033 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8034
8035#define IEM_MC_STORE_MEM_SEG_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8036 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8037 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8038
8039#define IEM_MC_STORE_MEM_SEG_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8040 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8041 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8042
8043#define IEM_MC_STORE_MEM_SEG_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8044 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8045 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8046
8047
8048#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8049 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8050 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8051
8052#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8053 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8054 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8055
8056#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8057 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8058 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8059
8060#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8061 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8062 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8063
8064
8065#define IEM_MC_STORE_MEM_SEG_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8066 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8067 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8068
8069#define IEM_MC_STORE_MEM_SEG_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8070 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8071 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8072
8073#define IEM_MC_STORE_MEM_SEG_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8074 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8075 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8076
8077#define IEM_MC_STORE_MEM_SEG_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8078 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8079 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8080
8081
8082#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8083 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8084 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8085
8086#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8087 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8088 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8089
8090#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8091 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8092 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8093
8094#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8095 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8096 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8097
8098/** Emits code for IEM_MC_STORE_MEM_SEG_U8/16/32/64_CONST and
8099 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8100template<uint8_t const a_cbMem, bool a_fFlat = false>
8101DECL_INLINE_THROW(uint32_t)
8102iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8103 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8104{
8105 /*
8106 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8107 * to do the grunt work.
8108 */
8109 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8110 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8111 kIemNativeEmitMemOp_Store,
8112 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8113 idxVarGCPtrMem, pfnFunction, idxInstr);
8114 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8115 return off;
8116}
8117
8118
8119#define IEM_MC_STORE_MEM_SEG_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8120 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8121 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8122 kIemNativeEmitMemOp_Store>(\
8123 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8124
8125#define IEM_MC_STORE_MEM_SEG_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8126 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8127 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8128
8129#define IEM_MC_STORE_MEM_SEG_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8130 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8131 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8132
8133#define IEM_MC_STORE_MEM_SEG_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8134 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8135 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8136 kIemNativeEmitMemOp_Store>(\
8137 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8138
8139
8140#define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8141 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8142 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8143 kIemNativeEmitMemOp_Store, true>(\
8144 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8145 pCallEntry->idxInstr)
8146
8147#define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8148 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8149 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8150
8151#define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8152 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8153 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8154
8155#define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8156 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8157 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8158 true>(\
8159 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8160
8161
8162
8163/*********************************************************************************************************************************
8164* Stack Accesses. *
8165*********************************************************************************************************************************/
8166#define IEM_MC_PUSH_U16(a_u16Value) \
8167 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8168#define IEM_MC_PUSH_U32(a_u32Value) \
8169 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8170#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8171 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8172#define IEM_MC_PUSH_U64(a_u64Value) \
8173 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8174
8175#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8176 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8177#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8178 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8179#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8180 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8181
8182#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8183 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8184#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8185 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8186
8187
8188/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8189template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8190DECL_INLINE_THROW(uint32_t)
8191iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8192{
8193 /*
8194 * Assert sanity.
8195 */
8196 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
8197 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
8198 AssertCompile(!a_fIsSegReg || a_cBitsVar < 64);
8199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8200 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8201#ifdef VBOX_STRICT
8202 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8203 if (a_cBitsFlat != 0)
8204 {
8205 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8206 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8207 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8208 Assert( pfnFunction
8209 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8210 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8211 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8212 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8213 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8214 : UINT64_C(0xc000b000a0009000) ));
8215 }
8216 else
8217 Assert( pfnFunction
8218 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8219 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8220 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8221 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8222 : UINT64_C(0xc000b000a0009000) ));
8223#endif
8224
8225#ifdef VBOX_STRICT
8226 /*
8227 * Check that the fExec flags we've got make sense.
8228 */
8229 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8230#endif
8231
8232 /*
8233 * To keep things simple we have to commit any pending writes first as we
8234 * may end up making calls.
8235 */
8236 /** @todo we could postpone this till we make the call and reload the
8237 * registers after returning from the call. Not sure if that's sensible or
8238 * not, though. */
8239 off = iemNativeRegFlushPendingWrites(pReNative, off);
8240
8241 /*
8242 * First we calculate the new RSP and the effective stack pointer value.
8243 * For 64-bit mode and flat 32-bit these two are the same.
8244 * (Code structure is very similar to that of PUSH)
8245 */
8246 RT_CONSTEXPR
8247 uint8_t const cbMem = a_cBitsVar / 8;
8248 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8249 uint8_t const cbMemAccess = !a_fIsSegReg || !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8250 ? cbMem : sizeof(uint16_t);
8251 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8252 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8253 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8254 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8255 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8256 {
8257 Assert(idxRegEffSp == idxRegRsp);
8258 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8259 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8260 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8261 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8262 else
8263 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8264 }
8265 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8266 {
8267 Assert(idxRegEffSp != idxRegRsp);
8268 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8269 kIemNativeGstRegUse_ReadOnly);
8270#ifdef RT_ARCH_AMD64
8271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8272#else
8273 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8274#endif
8275 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8276 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8277 offFixupJumpToUseOtherBitSp = off;
8278 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
8279 {
8280 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8281 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8282 }
8283 else
8284 {
8285 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8286 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8287 }
8288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8289 }
8290 /* SpUpdateEnd: */
8291 uint32_t const offLabelSpUpdateEnd = off;
8292
8293 /*
8294 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8295 * we're skipping lookup).
8296 */
8297 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8298 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8299 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8300 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8301 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8302 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8303 : UINT32_MAX;
8304 uint8_t const idxRegValue = !TlbState.fSkip
8305 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8306 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8307 IEMNATIVE_CALL_ARG2_GREG)
8308 : UINT8_MAX;
8309 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8310
8311
8312 if (!TlbState.fSkip)
8313 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8314 else
8315 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8316
8317 /*
8318 * Use16BitSp:
8319 */
8320 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8321 {
8322#ifdef RT_ARCH_AMD64
8323 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8324#else
8325 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8326#endif
8327 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8328 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
8329 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8330 else
8331 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8332 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8334 }
8335
8336 /*
8337 * TlbMiss:
8338 *
8339 * Call helper to do the pushing.
8340 */
8341 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8342
8343#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8344 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8345#else
8346 RT_NOREF(idxInstr);
8347#endif
8348
8349 /* Save variables in volatile registers. */
8350 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8351 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8352 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8353 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8354 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8355
8356 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8357 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8358 {
8359 /* Swap them using ARG0 as temp register: */
8360 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8361 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8362 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8363 }
8364 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8365 {
8366 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8367 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8368 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8369
8370 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8371 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8373 }
8374 else
8375 {
8376 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8378
8379 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8380 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8381 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8382 }
8383
8384#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8385 /* Do delayed EFLAGS calculations. */
8386 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8387 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8388#endif
8389
8390 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8391 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8392
8393 /* Done setting up parameters, make the call. */
8394 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8395
8396 /* Restore variables and guest shadow registers to volatile registers. */
8397 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8398 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8399
8400#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8401 if (!TlbState.fSkip)
8402 {
8403 /* end of TlbMiss - Jump to the done label. */
8404 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8405 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8406
8407 /*
8408 * TlbLookup:
8409 */
8410 if (!a_fIsSegReg || cbMemAccess == cbMem)
8411 {
8412 Assert(cbMemAccess == cbMem);
8413 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState,
8414 iSegReg, idxLabelTlbLookup,
8415 idxLabelTlbMiss, idxRegMemResult);
8416 }
8417 else
8418 {
8419 Assert(cbMemAccess == sizeof(uint16_t));
8420 off = iemNativeEmitTlbLookup<true, sizeof(uint16_t), sizeof(uint16_t) - 1,
8421 IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
8422 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8423 }
8424
8425 /*
8426 * Emit code to do the actual storing / fetching.
8427 */
8428 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8429# ifdef IEM_WITH_TLB_STATISTICS
8430 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8431 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8432# endif
8433 if (idxRegValue != UINT8_MAX)
8434 {
8435 switch (cbMemAccess)
8436 {
8437 case 2:
8438 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8439 break;
8440 case 4:
8441 if (!a_fIsSegReg || !fIsIntelSeg)
8442 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8443 else
8444 {
8445 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8446 PUSH FS in real mode, so we have to try emulate that here.
8447 We borrow the now unused idxReg1 from the TLB lookup code here. */
8448 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8449 kIemNativeGstReg_EFlags);
8450 if (idxRegEfl != UINT8_MAX)
8451 {
8452# ifdef ARCH_AMD64
8453 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8454 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8455 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8456# else
8457 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8458 off, TlbState.idxReg1, idxRegEfl,
8459 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8460# endif
8461 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8462 }
8463 else
8464 {
8465 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, TlbState.idxReg1);
8466 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8467 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8468 }
8469 /* ASSUMES the upper half of idxRegValue is ZERO. */
8470 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8471 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8472 }
8473 break;
8474 case 8:
8475 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8476 break;
8477 default:
8478 AssertFailed();
8479 }
8480 }
8481 else
8482 {
8483 switch (cbMemAccess)
8484 {
8485 case 2:
8486 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8487 idxRegMemResult, TlbState.idxReg1);
8488 break;
8489 case 4:
8490 Assert(!a_fIsSegReg);
8491 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8492 idxRegMemResult, TlbState.idxReg1);
8493 break;
8494 case 8:
8495 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8496 break;
8497 default:
8498 AssertFailed();
8499 }
8500 }
8501
8502 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8503 TlbState.freeRegsAndReleaseVars(pReNative);
8504
8505 /*
8506 * TlbDone:
8507 *
8508 * Commit the new RSP value.
8509 */
8510 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8511 }
8512#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8513
8514#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8515 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8516#endif
8517 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8518 if (idxRegEffSp != idxRegRsp)
8519 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8520
8521 /* The value variable is implictly flushed. */
8522 if (idxRegValue != UINT8_MAX)
8523 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8524 iemNativeVarFreeLocal(pReNative, idxVarValue);
8525
8526 return off;
8527}
8528
8529
8530
8531#define IEM_MC_POP_GREG_U16(a_iGReg) \
8532 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8533#define IEM_MC_POP_GREG_U32(a_iGReg) \
8534 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8535#define IEM_MC_POP_GREG_U64(a_iGReg) \
8536 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8537
8538#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8539 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8540#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8541 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8542
8543#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8544 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8545#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8546 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8547
8548
8549DECL_FORCE_INLINE_THROW(uint32_t)
8550iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8551 uint8_t idxRegTmp)
8552{
8553 /* Use16BitSp: */
8554#ifdef RT_ARCH_AMD64
8555 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8556 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8557 RT_NOREF(idxRegTmp);
8558#else
8559 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8560 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8561 /* add tmp, regrsp, #cbMem */
8562 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8563 /* and tmp, tmp, #0xffff */
8564 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8565 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8566 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8567 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8568#endif
8569 return off;
8570}
8571
8572
8573DECL_FORCE_INLINE(uint32_t)
8574iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8575{
8576 /* Use32BitSp: */
8577 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8578 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8579 return off;
8580}
8581
8582
8583/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8584template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8585DECL_INLINE_THROW(uint32_t)
8586iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8587{
8588 /*
8589 * Assert sanity.
8590 */
8591 Assert(idxGReg < 16);
8592#ifdef VBOX_STRICT
8593 if (a_cBitsFlat != 0)
8594 {
8595 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8596 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8597 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8598 Assert( pfnFunction
8599 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8600 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8601 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8602 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8603 : UINT64_C(0xc000b000a0009000) ));
8604 }
8605 else
8606 Assert( pfnFunction
8607 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8608 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8609 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8610 : UINT64_C(0xc000b000a0009000) ));
8611#endif
8612
8613#ifdef VBOX_STRICT
8614 /*
8615 * Check that the fExec flags we've got make sense.
8616 */
8617 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8618#endif
8619
8620 /*
8621 * To keep things simple we have to commit any pending writes first as we
8622 * may end up making calls.
8623 */
8624 off = iemNativeRegFlushPendingWrites(pReNative, off);
8625
8626 /*
8627 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8628 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8629 * directly as the effective stack pointer.
8630 * (Code structure is very similar to that of PUSH)
8631 */
8632 uint8_t const cbMem = a_cBitsVar / 8;
8633 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8634 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8635 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8636 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8637 * will be the resulting register value. */
8638 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8639
8640 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8641 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8642 {
8643 Assert(idxRegEffSp == idxRegRsp);
8644 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8645 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8646 }
8647 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8648 {
8649 Assert(idxRegEffSp != idxRegRsp);
8650 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8651 kIemNativeGstRegUse_ReadOnly);
8652#ifdef RT_ARCH_AMD64
8653 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8654#else
8655 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8656#endif
8657 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8658 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8659 offFixupJumpToUseOtherBitSp = off;
8660 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
8661 {
8662/** @todo can skip idxRegRsp updating when popping ESP. */
8663 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8664 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8665 }
8666 else
8667 {
8668 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8669 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8670 }
8671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8672 }
8673 /* SpUpdateEnd: */
8674 uint32_t const offLabelSpUpdateEnd = off;
8675
8676 /*
8677 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8678 * we're skipping lookup).
8679 */
8680 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8681 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8682 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8683 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8684 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8685 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8686 : UINT32_MAX;
8687
8688 if (!TlbState.fSkip)
8689 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8690 else
8691 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8692
8693 /*
8694 * Use16BitSp:
8695 */
8696 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8697 {
8698#ifdef RT_ARCH_AMD64
8699 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8700#else
8701 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8702#endif
8703 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8704 if ((pReNative->fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT)
8705 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8706 else
8707 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8708 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8709 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8710 }
8711
8712 /*
8713 * TlbMiss:
8714 *
8715 * Call helper to do the pushing.
8716 */
8717 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8718
8719#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8720 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8721#else
8722 RT_NOREF(idxInstr);
8723#endif
8724
8725 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8726 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8727 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8728 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8729
8730
8731 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8732 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8734
8735#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8736 /* Do delayed EFLAGS calculations. */
8737 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8738#endif
8739
8740 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8742
8743 /* Done setting up parameters, make the call. */
8744 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8745
8746 /* Move the return register content to idxRegMemResult. */
8747 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8749
8750 /* Restore variables and guest shadow registers to volatile registers. */
8751 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8752 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8753
8754#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8755 if (!TlbState.fSkip)
8756 {
8757 /* end of TlbMiss - Jump to the done label. */
8758 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8759 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8760
8761 /*
8762 * TlbLookup:
8763 */
8764 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
8765 idxLabelTlbLookup, idxLabelTlbMiss,
8766 idxRegMemResult);
8767
8768 /*
8769 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8770 */
8771 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8772# ifdef IEM_WITH_TLB_STATISTICS
8773 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8774 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8775# endif
8776 switch (cbMem)
8777 {
8778 case 2:
8779 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8780 break;
8781 case 4:
8782 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8783 break;
8784 case 8:
8785 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8786 break;
8787 default:
8788 AssertFailed();
8789 }
8790
8791 TlbState.freeRegsAndReleaseVars(pReNative);
8792
8793 /*
8794 * TlbDone:
8795 *
8796 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8797 * commit the popped register value.
8798 */
8799 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8800 }
8801#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8802
8803 if (idxGReg != X86_GREG_xSP)
8804 {
8805 /* Set the register. */
8806 if (cbMem >= sizeof(uint32_t))
8807 {
8808#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8809 AssertMsg( pReNative->idxCurCall == 0
8810 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8811 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8812 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8813#endif
8814 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8815#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8816 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8817#endif
8818#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8819 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8820 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8821#endif
8822 }
8823 else
8824 {
8825 Assert(cbMem == sizeof(uint16_t));
8826 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8827 kIemNativeGstRegUse_ForUpdate);
8828 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8829#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8831#endif
8832 iemNativeRegFreeTmp(pReNative, idxRegDst);
8833 }
8834
8835 /* Complete RSP calculation for FLAT mode. */
8836 if (idxRegEffSp == idxRegRsp)
8837 {
8838 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8839 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8840 else
8841 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8842 }
8843 }
8844 else
8845 {
8846 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8847 if (cbMem == sizeof(uint64_t))
8848 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8849 else if (cbMem == sizeof(uint32_t))
8850 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8851 else
8852 {
8853 if (idxRegEffSp == idxRegRsp)
8854 {
8855 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8856 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8857 else
8858 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8859 }
8860 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8861 }
8862 }
8863
8864#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8865 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8866#endif
8867
8868 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8869 if (idxRegEffSp != idxRegRsp)
8870 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8871 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8872
8873 return off;
8874}
8875
8876
8877
8878/*********************************************************************************************************************************
8879* Memory mapping (IEM_MC_MEM_SEG_MAP_XXX, IEM_MC_MEM_FLAT_MAP_XXX). *
8880*********************************************************************************************************************************/
8881
8882#define IEM_MC_MEM_SEG_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8883 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8884 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8885
8886#define IEM_MC_MEM_SEG_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8887 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8888 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8889
8890#define IEM_MC_MEM_SEG_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8891 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8892 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8893
8894#define IEM_MC_MEM_SEG_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8895 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8896 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8897
8898
8899#define IEM_MC_MEM_SEG_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8900 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8901 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8902
8903#define IEM_MC_MEM_SEG_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8904 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8905 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8906
8907#define IEM_MC_MEM_SEG_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8908 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8909 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8910
8911#define IEM_MC_MEM_SEG_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8912 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8913 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8914
8915#define IEM_MC_MEM_SEG_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8916 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8917 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8918
8919
8920#define IEM_MC_MEM_SEG_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8921 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8922 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8923
8924#define IEM_MC_MEM_SEG_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8925 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8926 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8927
8928#define IEM_MC_MEM_SEG_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8929 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8930 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8931
8932#define IEM_MC_MEM_SEG_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8933 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8934 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8935
8936#define IEM_MC_MEM_SEG_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8937 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8938 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8939
8940
8941#define IEM_MC_MEM_SEG_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8942 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8943 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8944
8945#define IEM_MC_MEM_SEG_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8946 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8947 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8948#define IEM_MC_MEM_SEG_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8949 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8950 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8951
8952#define IEM_MC_MEM_SEG_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8953 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8954 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8955
8956#define IEM_MC_MEM_SEG_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8957 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8958 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8959
8960
8961#define IEM_MC_MEM_SEG_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8962 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8963 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8964
8965#define IEM_MC_MEM_SEG_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8966 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
8967 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
8968 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8969
8970
8971#define IEM_MC_MEM_SEG_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8972 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8973 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8974
8975#define IEM_MC_MEM_SEG_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8976 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8977 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8978
8979#define IEM_MC_MEM_SEG_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8980 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8981 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8982
8983#define IEM_MC_MEM_SEG_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8984 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8985 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8986
8987
8988
8989#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8990 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
8991 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8992
8993#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8994 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
8995 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8996
8997#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8998 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
8999 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9000
9001#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9002 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9003 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9004
9005
9006#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9007 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9008 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9009
9010#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9011 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9012 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9013
9014#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9015 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9016 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9017
9018#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9019 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9020 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9021
9022#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9023 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9024 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9025
9026
9027#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9028 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9029 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9030
9031#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9032 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9033 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9034
9035#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9036 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9037 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9038
9039#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9040 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9041 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9042
9043#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9044 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9045 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9046
9047
9048#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9049 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9050 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9051
9052#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9053 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9054 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9055
9056#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9058 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9059
9060#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9061 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9062 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9063
9064#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9065 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9066 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9067
9068
9069#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9070 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9071 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9072
9073#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9074 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9075 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9076 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9077
9078
9079#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9080 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9081 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9082
9083#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9084 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9085 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9086
9087#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9088 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9089 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9090
9091#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9092 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9093 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9094
9095
9096template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9097DECL_INLINE_THROW(uint32_t)
9098iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9099 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9100{
9101 /*
9102 * Assert sanity.
9103 */
9104 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9105 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9106 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9107 && pVarMem->cbVar == sizeof(void *),
9108 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9109
9110 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9111 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9112 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9113 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9114 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9115
9116 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9118 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9119 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9120 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9121
9122 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9123
9124 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9125
9126#ifdef VBOX_STRICT
9127# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9128 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9129 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9130 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9131 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9132# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9133 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9134 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9135 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9136
9137 if RT_CONSTEXPR_IF(a_fFlat)
9138 {
9139 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9140 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9141 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9142 switch (a_cbMem)
9143 {
9144 case 1:
9145 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9146 Assert(!a_fAlignMaskAndCtl);
9147 break;
9148 case 2:
9149 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9150 Assert(a_fAlignMaskAndCtl < 2);
9151 break;
9152 case 4:
9153 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9154 Assert(a_fAlignMaskAndCtl < 4);
9155 break;
9156 case 8:
9157 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9158 Assert(a_fAlignMaskAndCtl < 8);
9159 break;
9160 case 10:
9161 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9162 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9163 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9164 Assert(a_fAlignMaskAndCtl < 8);
9165 break;
9166 case 16:
9167 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9168 Assert(a_fAlignMaskAndCtl < 16);
9169 break;
9170# if 0
9171 case 32:
9172 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9173 Assert(a_fAlignMaskAndCtl < 32);
9174 break;
9175 case 64:
9176 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9177 Assert(a_fAlignMaskAndCtl < 64);
9178 break;
9179# endif
9180 default: AssertFailed(); break;
9181 }
9182 }
9183 else
9184 {
9185 Assert(iSegReg < 6);
9186 switch (a_cbMem)
9187 {
9188 case 1:
9189 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9190 Assert(!a_fAlignMaskAndCtl);
9191 break;
9192 case 2:
9193 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9194 Assert(a_fAlignMaskAndCtl < 2);
9195 break;
9196 case 4:
9197 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9198 Assert(a_fAlignMaskAndCtl < 4);
9199 break;
9200 case 8:
9201 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9202 Assert(a_fAlignMaskAndCtl < 8);
9203 break;
9204 case 10:
9205 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9206 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9207 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9208 Assert(a_fAlignMaskAndCtl < 8);
9209 break;
9210 case 16:
9211 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9212 Assert(a_fAlignMaskAndCtl < 16);
9213 break;
9214# if 0
9215 case 32:
9216 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9217 Assert(a_fAlignMaskAndCtl < 32);
9218 break;
9219 case 64:
9220 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9221 Assert(a_fAlignMaskAndCtl < 64);
9222 break;
9223# endif
9224 default: AssertFailed(); break;
9225 }
9226 }
9227# undef IEM_MAP_HLP_FN
9228# undef IEM_MAP_HLP_FN_NO_AT
9229#endif
9230
9231#ifdef VBOX_STRICT
9232 /*
9233 * Check that the fExec flags we've got make sense.
9234 */
9235 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9236#endif
9237
9238 /*
9239 * To keep things simple we have to commit any pending writes first as we
9240 * may end up making calls.
9241 */
9242 off = iemNativeRegFlushPendingWrites(pReNative, off);
9243
9244#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9245 /*
9246 * Move/spill/flush stuff out of call-volatile registers.
9247 * This is the easy way out. We could contain this to the tlb-miss branch
9248 * by saving and restoring active stuff here.
9249 */
9250 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9251 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9252#endif
9253
9254 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9255 while the tlb-miss codepath will temporarily put it on the stack.
9256 Set the the type to stack here so we don't need to do it twice below. */
9257 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9258 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9259 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9260 * lookup is done. */
9261
9262 /*
9263 * Define labels and allocate the result register (trying for the return
9264 * register if we can).
9265 */
9266 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9267 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9268 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9269 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9270 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem);
9271 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9272 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9273 : UINT32_MAX;
9274
9275 /*
9276 * Jump to the TLB lookup code.
9277 */
9278 if (!TlbState.fSkip)
9279 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9280
9281 /*
9282 * TlbMiss:
9283 *
9284 * Call helper to do the fetching.
9285 * We flush all guest register shadow copies here.
9286 */
9287 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9288
9289#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9290 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9291#else
9292 RT_NOREF(idxInstr);
9293#endif
9294
9295#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9296 /* Save variables in volatile registers. */
9297 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9298 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9299#endif
9300
9301 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9302 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9303#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9304 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9305#else
9306 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9307#endif
9308
9309 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9310 if RT_CONSTEXPR_IF(!a_fFlat)
9311 {
9312 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9313 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9314 }
9315
9316#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9317 /* Do delayed EFLAGS calculations. */
9318 if RT_CONSTEXPR_IF(a_fFlat)
9319 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9320 fHstRegsNotToSave);
9321 else
9322 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9323 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9324 fHstRegsNotToSave);
9325#endif
9326
9327 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9328 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9329 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9330
9331 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9332 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9333
9334 /* Done setting up parameters, make the call. */
9335 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9336
9337 /*
9338 * Put the output in the right registers.
9339 */
9340 Assert(idxRegMemResult == pVarMem->idxReg);
9341 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9342 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9343
9344#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9345 /* Restore variables and guest shadow registers to volatile registers. */
9346 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9347 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9348#endif
9349
9350 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9351 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9352
9353#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9354 if (!TlbState.fSkip)
9355 {
9356 /* end of tlbsmiss - Jump to the done label. */
9357 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9358 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9359
9360 /*
9361 * TlbLookup:
9362 */
9363 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl, a_fAccess>(pReNative, off, &TlbState, iSegReg,
9364 idxLabelTlbLookup, idxLabelTlbMiss,
9365 idxRegMemResult);
9366# ifdef IEM_WITH_TLB_STATISTICS
9367 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9368 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9369# endif
9370
9371 /* [idxVarUnmapInfo] = 0; */
9372 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9373
9374 /*
9375 * TlbDone:
9376 */
9377 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9378
9379 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9380
9381# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9382 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9383 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9384# endif
9385 }
9386#else
9387 RT_NOREF(idxLabelTlbMiss);
9388#endif
9389
9390 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9391 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9392
9393 return off;
9394}
9395
9396
9397#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9398 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9399 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9400
9401#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9402 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9403 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9404
9405#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9406 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9407 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9408
9409#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9410 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9411 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9412
9413DECL_INLINE_THROW(uint32_t)
9414iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9415 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9416{
9417 /*
9418 * Assert sanity.
9419 */
9420 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9421#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9422 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9423#endif
9424 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9425 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9426 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9427#ifdef VBOX_STRICT
9428 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9429 {
9430 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9431 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9432 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9433 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9434 case IEM_ACCESS_TYPE_WRITE:
9435 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9436 case IEM_ACCESS_TYPE_READ:
9437 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9438 default: AssertFailed();
9439 }
9440#else
9441 RT_NOREF(fAccess);
9442#endif
9443
9444 /*
9445 * To keep things simple we have to commit any pending writes first as we
9446 * may end up making calls (there shouldn't be any at this point, so this
9447 * is just for consistency).
9448 */
9449 /** @todo we could postpone this till we make the call and reload the
9450 * registers after returning from the call. Not sure if that's sensible or
9451 * not, though. */
9452 off = iemNativeRegFlushPendingWrites(pReNative, off);
9453
9454 /*
9455 * Move/spill/flush stuff out of call-volatile registers.
9456 *
9457 * We exclude any register holding the bUnmapInfo variable, as we'll be
9458 * checking it after returning from the call and will free it afterwards.
9459 */
9460 /** @todo save+restore active registers and maybe guest shadows in miss
9461 * scenario. */
9462 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9463 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9464
9465 /*
9466 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9467 * to call the unmap helper function.
9468 *
9469 * The likelyhood of it being zero is higher than for the TLB hit when doing
9470 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9471 * access should also end up with a mapping that won't need special unmapping.
9472 */
9473 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9474 * should speed up things for the pure interpreter as well when TLBs
9475 * are enabled. */
9476#ifdef RT_ARCH_AMD64
9477 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9478 {
9479 /* test byte [rbp - xxx], 0ffh */
9480 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9481 pbCodeBuf[off++] = 0xf6;
9482 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9483 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9484 pbCodeBuf[off++] = 0xff;
9485 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9486 }
9487 else
9488#endif
9489 {
9490 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9491 IEMNATIVE_CALL_ARG1_GREG);
9492 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9493 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9494 }
9495 uint32_t const offJmpFixup = off;
9496 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9497
9498 /*
9499 * Call the unmap helper function.
9500 */
9501#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9502 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9503#else
9504 RT_NOREF(idxInstr);
9505#endif
9506
9507 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9508 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9509 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9510
9511 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9512 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9513
9514 /* Done setting up parameters, make the call.
9515 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9516 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9517 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9518
9519 /* The bUnmapInfo variable is implictly free by these MCs. */
9520 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9521
9522 /*
9523 * Done, just fixup the jump for the non-call case.
9524 */
9525 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9526
9527 return off;
9528}
9529
9530
9531
9532/*********************************************************************************************************************************
9533* State and Exceptions *
9534*********************************************************************************************************************************/
9535
9536#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9537#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9538
9539#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9540#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9541#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9542
9543#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9544#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9545#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9546
9547
9548DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9549{
9550#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9551 RT_NOREF(pReNative, fForChange);
9552#else
9553 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9554 && fForChange)
9555 {
9556# ifdef RT_ARCH_AMD64
9557
9558 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9559 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9560 {
9561 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9562
9563 /* stmxcsr */
9564 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9565 pbCodeBuf[off++] = X86_OP_REX_B;
9566 pbCodeBuf[off++] = 0x0f;
9567 pbCodeBuf[off++] = 0xae;
9568 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9569 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9570 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9571 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9572 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9574
9575 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9576 }
9577
9578 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9579 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9580 kIemNativeGstRegUse_ReadOnly);
9581
9582 /*
9583 * Mask any exceptions and clear the exception status and save into MXCSR,
9584 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9585 * a register source/target (sigh).
9586 */
9587 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9588 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9589 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9590 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9591
9592 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9593
9594 /* ldmxcsr */
9595 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9596 pbCodeBuf[off++] = X86_OP_REX_B;
9597 pbCodeBuf[off++] = 0x0f;
9598 pbCodeBuf[off++] = 0xae;
9599 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9600 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9601 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9602 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9603 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9605
9606 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9607 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9608
9609# elif defined(RT_ARCH_ARM64)
9610 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9611
9612 /* Need to save the host floating point control register the first time, clear FPSR. */
9613 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9614 {
9615 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9616 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9617 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9618 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9619 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9620 }
9621
9622 /*
9623 * Translate MXCSR to FPCR.
9624 *
9625 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9626 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9627 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9628 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9629 */
9630 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9631 * and implement alternate handling if FEAT_AFP is present. */
9632 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9633 kIemNativeGstRegUse_ReadOnly);
9634
9635 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9636
9637 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9638 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9639
9640 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9641 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9642 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9643 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9644 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9645
9646 /*
9647 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9648 *
9649 * Value MXCSR FPCR
9650 * 0 RN RN
9651 * 1 R- R+
9652 * 2 R+ R-
9653 * 3 RZ RZ
9654 *
9655 * Conversion can be achieved by switching bit positions
9656 */
9657 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9658 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9659 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9660 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9661
9662 /* Write the value to FPCR. */
9663 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9664
9665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9666 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9667 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9668# else
9669# error "Port me"
9670# endif
9671 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9672 }
9673#endif
9674 return off;
9675}
9676
9677
9678
9679/*********************************************************************************************************************************
9680* Emitters for FPU related operations. *
9681*********************************************************************************************************************************/
9682
9683#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9684 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9685
9686/** Emits code for IEM_MC_FETCH_FCW. */
9687DECL_INLINE_THROW(uint32_t)
9688iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9689{
9690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9691 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9692
9693 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9694
9695 /* Allocate a temporary FCW register. */
9696 /** @todo eliminate extra register */
9697 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9698 kIemNativeGstRegUse_ReadOnly);
9699
9700 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9701
9702 /* Free but don't flush the FCW register. */
9703 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9704 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9705
9706 return off;
9707}
9708
9709
9710#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9711 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9712
9713/** Emits code for IEM_MC_FETCH_FSW. */
9714DECL_INLINE_THROW(uint32_t)
9715iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9716{
9717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9719
9720 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9721 /* Allocate a temporary FSW register. */
9722 /** @todo eliminate extra register */
9723 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9724 kIemNativeGstRegUse_ReadOnly);
9725
9726 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9727
9728 /* Free but don't flush the FSW register. */
9729 iemNativeRegFreeTmp(pReNative, idxFswReg);
9730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9731
9732 return off;
9733}
9734
9735
9736
9737/*********************************************************************************************************************************
9738* Emitters for SSE/AVX specific operations. *
9739*********************************************************************************************************************************/
9740
9741#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9742 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9743
9744/** Emits code for IEM_MC_COPY_XREG_U128. */
9745DECL_INLINE_THROW(uint32_t)
9746iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9747{
9748 /* This is a nop if the source and destination register are the same. */
9749 if (iXRegDst != iXRegSrc)
9750 {
9751 /* Allocate destination and source register. */
9752 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9753 kIemNativeGstSimdRegLdStSz_Low128,
9754 kIemNativeGstRegUse_ForFullWrite);
9755 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9756 kIemNativeGstSimdRegLdStSz_Low128,
9757 kIemNativeGstRegUse_ReadOnly);
9758
9759 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9760
9761 /* Free but don't flush the source and destination register. */
9762 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9763 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9764 }
9765
9766 return off;
9767}
9768
9769
9770#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9771 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9772
9773/** Emits code for IEM_MC_FETCH_XREG_U128. */
9774DECL_INLINE_THROW(uint32_t)
9775iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9776{
9777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9779
9780 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9781 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9782
9783 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9784
9785 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9786
9787 /* Free but don't flush the source register. */
9788 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9789 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9790
9791 return off;
9792}
9793
9794
9795#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9796 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9797
9798#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9799 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9800
9801/** Emits code for IEM_MC_FETCH_XREG_U64. */
9802DECL_INLINE_THROW(uint32_t)
9803iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9804{
9805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9807
9808 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9809 kIemNativeGstSimdRegLdStSz_Low128,
9810 kIemNativeGstRegUse_ReadOnly);
9811
9812 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9813 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9814
9815 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9816
9817 /* Free but don't flush the source register. */
9818 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9819 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9820
9821 return off;
9822}
9823
9824
9825#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9826 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9827
9828#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9829 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9830
9831/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9832DECL_INLINE_THROW(uint32_t)
9833iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9834{
9835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9836 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9837
9838 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9839 kIemNativeGstSimdRegLdStSz_Low128,
9840 kIemNativeGstRegUse_ReadOnly);
9841
9842 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9843 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9844
9845 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9846
9847 /* Free but don't flush the source register. */
9848 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9849 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9850
9851 return off;
9852}
9853
9854
9855#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9856 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9857
9858/** Emits code for IEM_MC_FETCH_XREG_U16. */
9859DECL_INLINE_THROW(uint32_t)
9860iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9861{
9862 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9863 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9864
9865 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9866 kIemNativeGstSimdRegLdStSz_Low128,
9867 kIemNativeGstRegUse_ReadOnly);
9868
9869 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9870 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9871
9872 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9873
9874 /* Free but don't flush the source register. */
9875 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9876 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9877
9878 return off;
9879}
9880
9881
9882#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9883 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9884
9885/** Emits code for IEM_MC_FETCH_XREG_U8. */
9886DECL_INLINE_THROW(uint32_t)
9887iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9888{
9889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9891
9892 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9893 kIemNativeGstSimdRegLdStSz_Low128,
9894 kIemNativeGstRegUse_ReadOnly);
9895
9896 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9897 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9898
9899 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9900
9901 /* Free but don't flush the source register. */
9902 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9903 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9904
9905 return off;
9906}
9907
9908
9909#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9910 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9911
9912AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9913#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9914 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9915
9916
9917/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9918DECL_INLINE_THROW(uint32_t)
9919iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9920{
9921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9922 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9923
9924 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9925 kIemNativeGstSimdRegLdStSz_Low128,
9926 kIemNativeGstRegUse_ForFullWrite);
9927 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9928
9929 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9930
9931 /* Free but don't flush the source register. */
9932 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9933 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9934
9935 return off;
9936}
9937
9938
9939#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9940 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9941
9942#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9943 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9944
9945#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9946 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9947
9948#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9949 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9950
9951#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9952 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9953
9954#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9955 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9956
9957/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9958DECL_INLINE_THROW(uint32_t)
9959iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9960 uint8_t cbLocal, uint8_t iElem)
9961{
9962 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9963 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9964
9965#ifdef VBOX_STRICT
9966 switch (cbLocal)
9967 {
9968 case sizeof(uint64_t): Assert(iElem < 2); break;
9969 case sizeof(uint32_t): Assert(iElem < 4); break;
9970 case sizeof(uint16_t): Assert(iElem < 8); break;
9971 case sizeof(uint8_t): Assert(iElem < 16); break;
9972 default: AssertFailed();
9973 }
9974#endif
9975
9976 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9977 kIemNativeGstSimdRegLdStSz_Low128,
9978 kIemNativeGstRegUse_ForUpdate);
9979 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
9980
9981 switch (cbLocal)
9982 {
9983 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9984 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9985 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9986 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9987 default: AssertFailed();
9988 }
9989
9990 /* Free but don't flush the source register. */
9991 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9992 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9993
9994 return off;
9995}
9996
9997
9998#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9999 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10000
10001/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10002DECL_INLINE_THROW(uint32_t)
10003iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10004{
10005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10006 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10007
10008 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10009 kIemNativeGstSimdRegLdStSz_Low128,
10010 kIemNativeGstRegUse_ForUpdate);
10011 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10012
10013 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10014 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10015 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10016
10017 /* Free but don't flush the source register. */
10018 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10019 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10020
10021 return off;
10022}
10023
10024
10025#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10026 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10027
10028/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10029DECL_INLINE_THROW(uint32_t)
10030iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10031{
10032 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10033 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10034
10035 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10036 kIemNativeGstSimdRegLdStSz_Low128,
10037 kIemNativeGstRegUse_ForUpdate);
10038 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10039
10040 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10041 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10042 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10043
10044 /* Free but don't flush the source register. */
10045 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10046 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10047
10048 return off;
10049}
10050
10051
10052#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10053 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10054
10055/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10056DECL_INLINE_THROW(uint32_t)
10057iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10058 uint8_t idxSrcVar, uint8_t iDwSrc)
10059{
10060 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10061 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10062
10063 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10064 kIemNativeGstSimdRegLdStSz_Low128,
10065 kIemNativeGstRegUse_ForUpdate);
10066 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10067
10068 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10069 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10070
10071 /* Free but don't flush the destination register. */
10072 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10073 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10074
10075 return off;
10076}
10077
10078
10079#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10080 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10081
10082/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10085{
10086 /*
10087 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10088 * if iYRegDst gets allocated first for the full write it won't load the
10089 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10090 * duplicated from the already allocated host register for iYRegDst containing
10091 * garbage. This will be catched by the guest register value checking in debug
10092 * builds.
10093 */
10094 if (iYRegDst != iYRegSrc)
10095 {
10096 /* Allocate destination and source register. */
10097 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10098 kIemNativeGstSimdRegLdStSz_256,
10099 kIemNativeGstRegUse_ForFullWrite);
10100 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10101 kIemNativeGstSimdRegLdStSz_Low128,
10102 kIemNativeGstRegUse_ReadOnly);
10103
10104 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10105 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10106
10107 /* Free but don't flush the source and destination register. */
10108 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10109 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10110 }
10111 else
10112 {
10113 /* This effectively only clears the upper 128-bits of the register. */
10114 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10115 kIemNativeGstSimdRegLdStSz_High128,
10116 kIemNativeGstRegUse_ForFullWrite);
10117
10118 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10119
10120 /* Free but don't flush the destination register. */
10121 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10122 }
10123
10124 return off;
10125}
10126
10127
10128#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10129 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10130
10131/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10132DECL_INLINE_THROW(uint32_t)
10133iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10134{
10135 /*
10136 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10137 * if iYRegDst gets allocated first for the full write it won't load the
10138 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10139 * duplicated from the already allocated host register for iYRegDst containing
10140 * garbage. This will be catched by the guest register value checking in debug
10141 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10142 * for a zmm register we don't support yet, so this is just a nop.
10143 */
10144 if (iYRegDst != iYRegSrc)
10145 {
10146 /* Allocate destination and source register. */
10147 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10148 kIemNativeGstSimdRegLdStSz_256,
10149 kIemNativeGstRegUse_ReadOnly);
10150 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10151 kIemNativeGstSimdRegLdStSz_256,
10152 kIemNativeGstRegUse_ForFullWrite);
10153
10154 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10155
10156 /* Free but don't flush the source and destination register. */
10157 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10158 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10159 }
10160
10161 return off;
10162}
10163
10164
10165#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10166 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10167
10168/** Emits code for IEM_MC_FETCH_YREG_U128. */
10169DECL_INLINE_THROW(uint32_t)
10170iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10171{
10172 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10173 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10174
10175 Assert(iDQWord <= 1);
10176 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10177 iDQWord == 1
10178 ? kIemNativeGstSimdRegLdStSz_High128
10179 : kIemNativeGstSimdRegLdStSz_Low128,
10180 kIemNativeGstRegUse_ReadOnly);
10181
10182 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10183 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10184
10185 if (iDQWord == 1)
10186 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10187 else
10188 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10189
10190 /* Free but don't flush the source register. */
10191 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10192 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10193
10194 return off;
10195}
10196
10197
10198#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10199 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10200
10201/** Emits code for IEM_MC_FETCH_YREG_U64. */
10202DECL_INLINE_THROW(uint32_t)
10203iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10204{
10205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10207
10208 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10209 iQWord >= 2
10210 ? kIemNativeGstSimdRegLdStSz_High128
10211 : kIemNativeGstSimdRegLdStSz_Low128,
10212 kIemNativeGstRegUse_ReadOnly);
10213
10214 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10215 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10216
10217 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10218
10219 /* Free but don't flush the source register. */
10220 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10221 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10222
10223 return off;
10224}
10225
10226
10227#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10228 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10229
10230/** Emits code for IEM_MC_FETCH_YREG_U32. */
10231DECL_INLINE_THROW(uint32_t)
10232iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10233{
10234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10235 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10236
10237 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10238 iDWord >= 4
10239 ? kIemNativeGstSimdRegLdStSz_High128
10240 : kIemNativeGstSimdRegLdStSz_Low128,
10241 kIemNativeGstRegUse_ReadOnly);
10242
10243 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10244 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10245
10246 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10247
10248 /* Free but don't flush the source register. */
10249 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10250 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10251
10252 return off;
10253}
10254
10255
10256#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10257 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10258
10259/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10262{
10263 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10264 kIemNativeGstSimdRegLdStSz_High128,
10265 kIemNativeGstRegUse_ForFullWrite);
10266
10267 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10268
10269 /* Free but don't flush the register. */
10270 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10271
10272 return off;
10273}
10274
10275
10276#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10277 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10278
10279/** Emits code for IEM_MC_STORE_YREG_U128. */
10280DECL_INLINE_THROW(uint32_t)
10281iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10282{
10283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10284 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10285
10286 Assert(iDQword <= 1);
10287 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10288 iDQword == 0
10289 ? kIemNativeGstSimdRegLdStSz_Low128
10290 : kIemNativeGstSimdRegLdStSz_High128,
10291 kIemNativeGstRegUse_ForFullWrite);
10292
10293 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10294
10295 if (iDQword == 0)
10296 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10297 else
10298 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10299
10300 /* Free but don't flush the source register. */
10301 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10302 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10303
10304 return off;
10305}
10306
10307
10308#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10309 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10310
10311/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10312DECL_INLINE_THROW(uint32_t)
10313iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10314{
10315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10316 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10317
10318 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10319 kIemNativeGstSimdRegLdStSz_256,
10320 kIemNativeGstRegUse_ForFullWrite);
10321
10322 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10323
10324 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10325 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10326
10327 /* Free but don't flush the source register. */
10328 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10329 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10330
10331 return off;
10332}
10333
10334
10335#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10336 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10337
10338/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10339DECL_INLINE_THROW(uint32_t)
10340iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10341{
10342 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10343 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10344
10345 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10346 kIemNativeGstSimdRegLdStSz_256,
10347 kIemNativeGstRegUse_ForFullWrite);
10348
10349 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10350
10351 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10352 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10353
10354 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10355 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10356
10357 return off;
10358}
10359
10360
10361#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10362 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10363
10364/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10365DECL_INLINE_THROW(uint32_t)
10366iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10367{
10368 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10369 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10370
10371 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10372 kIemNativeGstSimdRegLdStSz_256,
10373 kIemNativeGstRegUse_ForFullWrite);
10374
10375 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10376
10377 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10378 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10379
10380 /* Free but don't flush the source register. */
10381 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10382 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10383
10384 return off;
10385}
10386
10387
10388#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10389 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10390
10391/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10392DECL_INLINE_THROW(uint32_t)
10393iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10394{
10395 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10396 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10397
10398 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10399 kIemNativeGstSimdRegLdStSz_256,
10400 kIemNativeGstRegUse_ForFullWrite);
10401
10402 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10403
10404 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10405 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10406
10407 /* Free but don't flush the source register. */
10408 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10409 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10410
10411 return off;
10412}
10413
10414
10415#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10416 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10417
10418/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10419DECL_INLINE_THROW(uint32_t)
10420iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10421{
10422 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10423 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10424
10425 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10426 kIemNativeGstSimdRegLdStSz_256,
10427 kIemNativeGstRegUse_ForFullWrite);
10428
10429 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10430
10431 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10432 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10433
10434 /* Free but don't flush the source register. */
10435 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10436 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10437
10438 return off;
10439}
10440
10441
10442#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10443 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10444
10445/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10446DECL_INLINE_THROW(uint32_t)
10447iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10448{
10449 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10450 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10451
10452 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10453 kIemNativeGstSimdRegLdStSz_256,
10454 kIemNativeGstRegUse_ForFullWrite);
10455
10456 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10457
10458 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10459
10460 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10461 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10462
10463 return off;
10464}
10465
10466
10467#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10468 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10469
10470/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10471DECL_INLINE_THROW(uint32_t)
10472iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10473{
10474 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10475 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10476
10477 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10478 kIemNativeGstSimdRegLdStSz_256,
10479 kIemNativeGstRegUse_ForFullWrite);
10480
10481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10482
10483 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10484
10485 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10486 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10487
10488 return off;
10489}
10490
10491
10492#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10493 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10494
10495/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10496DECL_INLINE_THROW(uint32_t)
10497iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10498{
10499 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10500 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10501
10502 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10503 kIemNativeGstSimdRegLdStSz_256,
10504 kIemNativeGstRegUse_ForFullWrite);
10505
10506 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10507
10508 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10509
10510 /* Free but don't flush the source register. */
10511 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10512 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10513
10514 return off;
10515}
10516
10517
10518#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10519 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10520
10521/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10522DECL_INLINE_THROW(uint32_t)
10523iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10524{
10525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10526 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10527
10528 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10529 kIemNativeGstSimdRegLdStSz_256,
10530 kIemNativeGstRegUse_ForFullWrite);
10531
10532 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10533
10534 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10535
10536 /* Free but don't flush the source register. */
10537 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10538 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10539
10540 return off;
10541}
10542
10543
10544#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10545 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10546
10547/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10548DECL_INLINE_THROW(uint32_t)
10549iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10550{
10551 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10552 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10553
10554 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10555 kIemNativeGstSimdRegLdStSz_256,
10556 kIemNativeGstRegUse_ForFullWrite);
10557
10558 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10559
10560 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10561
10562 /* Free but don't flush the source register. */
10563 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10564 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10565
10566 return off;
10567}
10568
10569
10570#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10571 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10572
10573/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10574DECL_INLINE_THROW(uint32_t)
10575iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10576{
10577 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10578 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10579
10580 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10581 kIemNativeGstSimdRegLdStSz_256,
10582 kIemNativeGstRegUse_ForFullWrite);
10583
10584 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10585
10586 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10587 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10588
10589 /* Free but don't flush the source register. */
10590 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10591 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10592
10593 return off;
10594}
10595
10596
10597#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10598 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10599
10600/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10601DECL_INLINE_THROW(uint32_t)
10602iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10603{
10604 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10605 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10606
10607 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10608 kIemNativeGstSimdRegLdStSz_256,
10609 kIemNativeGstRegUse_ForFullWrite);
10610
10611 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10612
10613 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10614 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10615
10616 /* Free but don't flush the source register. */
10617 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10618 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10619
10620 return off;
10621}
10622
10623
10624#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10625 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10626
10627/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10628DECL_INLINE_THROW(uint32_t)
10629iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10630{
10631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10632 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10633
10634 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10635 kIemNativeGstSimdRegLdStSz_256,
10636 kIemNativeGstRegUse_ForFullWrite);
10637 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10638 kIemNativeGstSimdRegLdStSz_Low128,
10639 kIemNativeGstRegUse_ReadOnly);
10640 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10641
10642 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10643 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10644 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10645
10646 /* Free but don't flush the source and destination registers. */
10647 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10648 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10649 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10650
10651 return off;
10652}
10653
10654
10655#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10656 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10657
10658/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10659DECL_INLINE_THROW(uint32_t)
10660iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10661{
10662 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10663 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10664
10665 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10666 kIemNativeGstSimdRegLdStSz_256,
10667 kIemNativeGstRegUse_ForFullWrite);
10668 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10669 kIemNativeGstSimdRegLdStSz_Low128,
10670 kIemNativeGstRegUse_ReadOnly);
10671 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10672
10673 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10674 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10675 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10676
10677 /* Free but don't flush the source and destination registers. */
10678 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10679 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10680 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10681
10682 return off;
10683}
10684
10685
10686#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10687 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10688
10689
10690/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10691DECL_INLINE_THROW(uint32_t)
10692iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10693{
10694 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10695 kIemNativeGstSimdRegLdStSz_Low128,
10696 kIemNativeGstRegUse_ForUpdate);
10697
10698 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10699 if (bImm8Mask & RT_BIT(0))
10700 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10701 if (bImm8Mask & RT_BIT(1))
10702 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10703 if (bImm8Mask & RT_BIT(2))
10704 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10705 if (bImm8Mask & RT_BIT(3))
10706 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10707
10708 /* Free but don't flush the destination register. */
10709 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10710
10711 return off;
10712}
10713
10714
10715#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10716 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10717
10718#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10719 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10720
10721/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10722DECL_INLINE_THROW(uint32_t)
10723iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10724{
10725 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10726 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10727
10728 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10729 kIemNativeGstSimdRegLdStSz_256,
10730 kIemNativeGstRegUse_ReadOnly);
10731 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10732
10733 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10734
10735 /* Free but don't flush the source register. */
10736 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10737 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10738
10739 return off;
10740}
10741
10742
10743#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10744 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10745
10746#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10747 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10748
10749/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10750DECL_INLINE_THROW(uint32_t)
10751iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10752{
10753 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10754 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10755
10756 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10757 kIemNativeGstSimdRegLdStSz_256,
10758 kIemNativeGstRegUse_ForFullWrite);
10759 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10760
10761 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10762
10763 /* Free but don't flush the source register. */
10764 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10765 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10766
10767 return off;
10768}
10769
10770
10771#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10772 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10773
10774
10775/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10776DECL_INLINE_THROW(uint32_t)
10777iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10778 uint8_t idxSrcVar, uint8_t iDwSrc)
10779{
10780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10781 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10782
10783 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10784 iDwDst < 4
10785 ? kIemNativeGstSimdRegLdStSz_Low128
10786 : kIemNativeGstSimdRegLdStSz_High128,
10787 kIemNativeGstRegUse_ForUpdate);
10788 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10789 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10790
10791 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10792 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10793
10794 /* Free but don't flush the source register. */
10795 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10796 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10797 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10798
10799 return off;
10800}
10801
10802
10803#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10804 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10805
10806
10807/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10808DECL_INLINE_THROW(uint32_t)
10809iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10810 uint8_t idxSrcVar, uint8_t iQwSrc)
10811{
10812 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10813 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10814
10815 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10816 iQwDst < 2
10817 ? kIemNativeGstSimdRegLdStSz_Low128
10818 : kIemNativeGstSimdRegLdStSz_High128,
10819 kIemNativeGstRegUse_ForUpdate);
10820 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10821 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10822
10823 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10824 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10825
10826 /* Free but don't flush the source register. */
10827 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10828 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10829 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10830
10831 return off;
10832}
10833
10834
10835#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10836 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10837
10838
10839/** Emits code for IEM_MC_STORE_YREG_U64. */
10840DECL_INLINE_THROW(uint32_t)
10841iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10842{
10843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10844 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10845
10846 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10847 iQwDst < 2
10848 ? kIemNativeGstSimdRegLdStSz_Low128
10849 : kIemNativeGstSimdRegLdStSz_High128,
10850 kIemNativeGstRegUse_ForUpdate);
10851
10852 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10853
10854 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10855
10856 /* Free but don't flush the source register. */
10857 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10858 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10859
10860 return off;
10861}
10862
10863
10864#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10865 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10866
10867/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10868DECL_INLINE_THROW(uint32_t)
10869iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10870{
10871 RT_NOREF(pReNative, iYReg);
10872 /** @todo Needs to be implemented when support for AVX-512 is added. */
10873 return off;
10874}
10875
10876
10877
10878/*********************************************************************************************************************************
10879* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10880*********************************************************************************************************************************/
10881
10882/**
10883 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10884 */
10885DECL_INLINE_THROW(uint32_t)
10886iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10887{
10888 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10889 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10890 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10891 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10892
10893#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10894 /*
10895 * Need to do the FPU preparation.
10896 */
10897 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10898#endif
10899
10900 /*
10901 * Do all the call setup and cleanup.
10902 */
10903 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10904 false /*fFlushPendingWrites*/);
10905
10906 /*
10907 * Load the MXCSR register into the first argument and mask out the current exception flags.
10908 */
10909 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10910 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10911
10912 /*
10913 * Make the call.
10914 */
10915 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10916
10917 /*
10918 * The updated MXCSR is in the return register, update exception status flags.
10919 *
10920 * The return register is marked allocated as a temporary because it is required for the
10921 * exception generation check below.
10922 */
10923 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10924 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10925 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10926
10927#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10928 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10929 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_MxCsr>(pReNative, off, idxRegMxCsr);
10930#endif
10931
10932 /*
10933 * Make sure we don't have any outstanding guest register writes as we may
10934 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10935 */
10936 off = iemNativeRegFlushPendingWrites(pReNative, off);
10937
10938#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10939 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10940#else
10941 RT_NOREF(idxInstr);
10942#endif
10943
10944 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10945 * want to assume the existence for this instruction at the moment. */
10946 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10947
10948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10949 /* tmp &= X86_MXCSR_XCPT_MASK */
10950 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10951 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10952 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10953 /* tmp = ~tmp */
10954 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10955 /* tmp &= mxcsr */
10956 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10957 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
10958 X86_MXCSR_XCPT_FLAGS);
10959
10960 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10961 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10962 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10963
10964 return off;
10965}
10966
10967
10968#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10969 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10970
10971/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10972DECL_INLINE_THROW(uint32_t)
10973iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10974{
10975 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10976 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10977 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10978}
10979
10980
10981#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10982 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10983
10984/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10985DECL_INLINE_THROW(uint32_t)
10986iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10987 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10988{
10989 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10990 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10991 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10992 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10993}
10994
10995
10996/*********************************************************************************************************************************
10997* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10998*********************************************************************************************************************************/
10999
11000#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11001 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11002
11003/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11004DECL_INLINE_THROW(uint32_t)
11005iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11006{
11007 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11008 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11009 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11010}
11011
11012
11013#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11014 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11015
11016/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11017DECL_INLINE_THROW(uint32_t)
11018iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11019 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11020{
11021 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11022 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11023 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11024 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11025}
11026
11027
11028
11029/*********************************************************************************************************************************
11030* Include instruction emitters. *
11031*********************************************************************************************************************************/
11032#include "VMMAll/target-x86/IEMAllN8veEmit-x86.h"
11033
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette