VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104177

Last change on this file since 104177 was 104177, checked in by vboxsync, 10 months ago

VMM/IEM: Get rid of IEM_MC_STORE_SSE_RESULT(), by checking for pending exceptions with IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() before storing the actual result which can be done with IEM_MC_STORE_XREG_XMM(). Avoids the recompiler emitting code for checking the MXCSR twice, bugref:10641

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 430.6 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104177 2024-04-05 12:22:54Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(idxInstr);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
365 }
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToRip64AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToEip32AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442
443 /* Free but don't flush the PC register. */
444 iemNativeRegFreeTmp(pReNative, idxPcReg);
445#endif
446
447#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
448 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
449
450 pReNative->Core.offPc += cbInstr;
451# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
452 off = iemNativePcAdjustCheck(pReNative, off);
453# endif
454 if (pReNative->cCondDepth)
455 off = iemNativeEmitPcWriteback(pReNative, off);
456 else
457 pReNative->Core.cInstrPcUpdateSkipped++;
458#endif
459
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
472
473/** Same as iemRegAddToIp16AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
490
491 /* Free but don't flush the PC register. */
492 iemNativeRegFreeTmp(pReNative, idxPcReg);
493#endif
494
495#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
496 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
497
498 pReNative->Core.offPc += cbInstr;
499# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
500 off = iemNativePcAdjustCheck(pReNative, off);
501# endif
502 if (pReNative->cCondDepth)
503 off = iemNativeEmitPcWriteback(pReNative, off);
504 else
505 pReNative->Core.cInstrPcUpdateSkipped++;
506#endif
507
508 return off;
509}
510
511
512
513/*********************************************************************************************************************************
514* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
515*********************************************************************************************************************************/
516
517#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
518 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
519 (a_enmEffOpSize), pCallEntry->idxInstr); \
520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
521
522#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
523 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
524 (a_enmEffOpSize), pCallEntry->idxInstr); \
525 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
526 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
527
528#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
529 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
530 IEMMODE_16BIT, pCallEntry->idxInstr); \
531 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
532
533#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
534 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
535 IEMMODE_16BIT, pCallEntry->idxInstr); \
536 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
537 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
538
539#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
540 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
541 IEMMODE_64BIT, pCallEntry->idxInstr); \
542 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
543
544#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
545 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
546 IEMMODE_64BIT, pCallEntry->idxInstr); \
547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
549
550/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
551 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
552 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
553DECL_INLINE_THROW(uint32_t)
554iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
555 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
556{
557 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
558
559 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
560 off = iemNativeRegFlushPendingWrites(pReNative, off);
561
562#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
563 Assert(pReNative->Core.offPc == 0);
564
565 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
566#endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition. */
572 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
573
574 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
575 {
576 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
578 }
579 else
580 {
581 /* Just truncate the result to 16-bit IP. */
582 Assert(enmEffOpSize == IEMMODE_16BIT);
583 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
584 }
585 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
586
587 /* Free but don't flush the PC register. */
588 iemNativeRegFreeTmp(pReNative, idxPcReg);
589
590 return off;
591}
592
593
594#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
595 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
596 (a_enmEffOpSize), pCallEntry->idxInstr); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
603 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
604
605#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
606 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
607 IEMMODE_16BIT, pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
615
616#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
618 IEMMODE_32BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_32BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
626
627/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
628 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
629 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
630DECL_INLINE_THROW(uint32_t)
631iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
632 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
633{
634 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
635
636 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
637 off = iemNativeRegFlushPendingWrites(pReNative, off);
638
639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
640 Assert(pReNative->Core.offPc == 0);
641
642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
643#endif
644
645 /* Allocate a temporary PC register. */
646 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
647
648 /* Perform the addition. */
649 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
650
651 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
652 if (enmEffOpSize == IEMMODE_16BIT)
653 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
654
655 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
656/** @todo we can skip this in 32-bit FLAT mode. */
657 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
658
659 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
660
661 /* Free but don't flush the PC register. */
662 iemNativeRegFreeTmp(pReNative, idxPcReg);
663
664 return off;
665}
666
667
668#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
669 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
671
672#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
676
677#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
678 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
680
681#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
682 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
683 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
684 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
685
686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
687 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
689
690#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
692 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
694
695/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
696DECL_INLINE_THROW(uint32_t)
697iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
698 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
699{
700 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
701 off = iemNativeRegFlushPendingWrites(pReNative, off);
702
703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
704 Assert(pReNative->Core.offPc == 0);
705
706 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
707#endif
708
709 /* Allocate a temporary PC register. */
710 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
711
712 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
713 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
714 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
715 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
716 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
717
718 /* Free but don't flush the PC register. */
719 iemNativeRegFreeTmp(pReNative, idxPcReg);
720
721 return off;
722}
723
724
725
726/*********************************************************************************************************************************
727* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
728*********************************************************************************************************************************/
729
730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
731#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
732 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
733
734/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
735#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
736 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
743 * clears flags. */
744#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
745 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
747
748/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
749 * clears flags. */
750#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
751 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
753
754/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
755 * clears flags. */
756#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
757 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
759
760#undef IEM_MC_SET_RIP_U16_AND_FINISH
761
762
763/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
764#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
765 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
766
767/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
768#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
769 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
772 * clears flags. */
773#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
774 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
775 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
776
777/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
778 * and clears flags. */
779#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
780 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
782
783#undef IEM_MC_SET_RIP_U32_AND_FINISH
784
785
786/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
787#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
788 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
789
790/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
791 * and clears flags. */
792#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
793 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
794 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
795
796#undef IEM_MC_SET_RIP_U64_AND_FINISH
797
798
799/** Same as iemRegRipJumpU16AndFinishNoFlags,
800 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
801DECL_INLINE_THROW(uint32_t)
802iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
803 uint8_t idxInstr, uint8_t cbVar)
804{
805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
807
808 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
809 off = iemNativeRegFlushPendingWrites(pReNative, off);
810
811#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
812 Assert(pReNative->Core.offPc == 0);
813
814 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
815#endif
816
817 /* Get a register with the new PC loaded from idxVarPc.
818 Note! This ASSUMES that the high bits of the GPR is zeroed. */
819 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
820
821 /* Check limit (may #GP(0) + exit TB). */
822 if (!f64Bit)
823/** @todo we can skip this test in FLAT 32-bit mode. */
824 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
825 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
826 else if (cbVar > sizeof(uint32_t))
827 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
828
829 /* Store the result. */
830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
831
832 iemNativeVarRegisterRelease(pReNative, idxVarPc);
833 /** @todo implictly free the variable? */
834
835 return off;
836}
837
838
839
840/*********************************************************************************************************************************
841* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
842*********************************************************************************************************************************/
843
844#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
845 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
846
847/**
848 * Emits code to check if a \#NM exception should be raised.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxInstr The current instruction.
854 */
855DECL_INLINE_THROW(uint32_t)
856iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
857{
858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
859 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
860
861 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
862 {
863#endif
864 /*
865 * Make sure we don't have any outstanding guest register writes as we may
866 * raise an #NM and all guest register must be up to date in CPUMCTX.
867 */
868 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
869 off = iemNativeRegFlushPendingWrites(pReNative, off);
870
871#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
872 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
873#else
874 RT_NOREF(idxInstr);
875#endif
876
877 /* Allocate a temporary CR0 register. */
878 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
879 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
880
881 /*
882 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
883 * return raisexcpt();
884 */
885 /* Test and jump. */
886 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
887
888 /* Free but don't flush the CR0 register. */
889 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
890
891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
893 }
894 else
895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
896#endif
897
898 return off;
899}
900
901
902#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
903 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
904
905/**
906 * Emits code to check if a \#MF exception should be raised.
907 *
908 * @returns New code buffer offset, UINT32_MAX on failure.
909 * @param pReNative The native recompile state.
910 * @param off The code buffer offset.
911 * @param idxInstr The current instruction.
912 */
913DECL_INLINE_THROW(uint32_t)
914iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
915{
916 /*
917 * Make sure we don't have any outstanding guest register writes as we may
918 * raise an #MF and all guest register must be up to date in CPUMCTX.
919 */
920 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
921 off = iemNativeRegFlushPendingWrites(pReNative, off);
922
923#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
924 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
925#else
926 RT_NOREF(idxInstr);
927#endif
928
929 /* Allocate a temporary FSW register. */
930 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
931 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
932
933 /*
934 * if (FSW & X86_FSW_ES != 0)
935 * return raisexcpt();
936 */
937 /* Test and jump. */
938 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
939
940 /* Free but don't flush the FSW register. */
941 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
942
943 return off;
944}
945
946
947#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
948 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
949
950/**
951 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
952 *
953 * @returns New code buffer offset, UINT32_MAX on failure.
954 * @param pReNative The native recompile state.
955 * @param off The code buffer offset.
956 * @param idxInstr The current instruction.
957 */
958DECL_INLINE_THROW(uint32_t)
959iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
960{
961#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
962 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
963
964 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
965 {
966#endif
967 /*
968 * Make sure we don't have any outstanding guest register writes as we may
969 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
970 */
971 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
972 off = iemNativeRegFlushPendingWrites(pReNative, off);
973
974#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
975 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
976#else
977 RT_NOREF(idxInstr);
978#endif
979
980 /* Allocate a temporary CR0 and CR4 register. */
981 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
982 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
983 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
984 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
985
986 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
987#ifdef RT_ARCH_AMD64
988 /*
989 * We do a modified test here:
990 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
991 * else { goto RaiseSseRelated; }
992 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
993 * all targets except the 386, which doesn't support SSE, this should
994 * be a safe assumption.
995 */
996 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
997 //pCodeBuf[off++] = 0xcc;
998 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
999 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
1000 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
1001 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
1002 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
1003 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
1004
1005#elif defined(RT_ARCH_ARM64)
1006 /*
1007 * We do a modified test here:
1008 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
1009 * else { goto RaiseSseRelated; }
1010 */
1011 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
1012 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1013 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
1014 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
1015 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
1016 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1017 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1018 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
1019 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1020 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1021 idxLabelRaiseSseRelated);
1022
1023#else
1024# error "Port me!"
1025#endif
1026
1027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1028 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1029 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1030 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1031
1032#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1033 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1034 }
1035 else
1036 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1037#endif
1038
1039 return off;
1040}
1041
1042
1043#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1044 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1045
1046/**
1047 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1048 *
1049 * @returns New code buffer offset, UINT32_MAX on failure.
1050 * @param pReNative The native recompile state.
1051 * @param off The code buffer offset.
1052 * @param idxInstr The current instruction.
1053 */
1054DECL_INLINE_THROW(uint32_t)
1055iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1056{
1057#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1058 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1059
1060 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1061 {
1062#endif
1063 /*
1064 * Make sure we don't have any outstanding guest register writes as we may
1065 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1066 */
1067 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1068 off = iemNativeRegFlushPendingWrites(pReNative, off);
1069
1070#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1071 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1072#else
1073 RT_NOREF(idxInstr);
1074#endif
1075
1076 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1077 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1078 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1079 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1080 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1081 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1082
1083 /*
1084 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1085 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1086 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1087 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1088 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1089 * { likely }
1090 * else { goto RaiseAvxRelated; }
1091 */
1092#ifdef RT_ARCH_AMD64
1093 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1094 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1095 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1096 ^ 0x1a) ) { likely }
1097 else { goto RaiseAvxRelated; } */
1098 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1099 //pCodeBuf[off++] = 0xcc;
1100 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1101 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1102 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1103 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1104 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1105 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1106 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1107 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1108 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1109 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1110 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1111
1112#elif defined(RT_ARCH_ARM64)
1113 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1114 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1115 else { goto RaiseAvxRelated; } */
1116 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1117 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1118 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1120 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1121 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1122 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1123 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1124 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1125 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1126 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1127 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1128 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1129 idxLabelRaiseAvxRelated);
1130
1131#else
1132# error "Port me!"
1133#endif
1134
1135 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1136 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1137 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1138 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1139#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1140 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1141 }
1142 else
1143 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1144#endif
1145
1146 return off;
1147}
1148
1149
1150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1151#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1152 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
1153
1154/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1155DECL_INLINE_THROW(uint32_t)
1156iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1157{
1158 /*
1159 * Make sure we don't have any outstanding guest register writes as we may
1160 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
1161 */
1162 off = iemNativeRegFlushPendingWrites(pReNative, off);
1163
1164#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1165 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1166#else
1167 RT_NOREF(idxInstr);
1168#endif
1169
1170 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1171 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1172 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1173
1174 /* mov tmp, varmxcsr */
1175 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1176 /* tmp &= X86_MXCSR_XCPT_MASK */
1177 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1178 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1179 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1180 /* tmp = ~tmp */
1181 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1182 /* tmp &= mxcsr */
1183 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1184 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1185 idxLabelRaiseSseAvxFpRelated);
1186
1187 /* Free but don't flush the MXCSR register. */
1188 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1189 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1190
1191 return off;
1192}
1193#endif
1194
1195
1196#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1197 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1198
1199/**
1200 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1201 *
1202 * @returns New code buffer offset, UINT32_MAX on failure.
1203 * @param pReNative The native recompile state.
1204 * @param off The code buffer offset.
1205 * @param idxInstr The current instruction.
1206 */
1207DECL_INLINE_THROW(uint32_t)
1208iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1209{
1210 /*
1211 * Make sure we don't have any outstanding guest register writes as we may
1212 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1213 */
1214 off = iemNativeRegFlushPendingWrites(pReNative, off);
1215
1216#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1217 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1218#else
1219 RT_NOREF(idxInstr);
1220#endif
1221
1222 /* Allocate a temporary CR4 register. */
1223 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1224 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1225 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1226
1227 /*
1228 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1229 * return raisexcpt();
1230 */
1231 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1232
1233 /* raise \#UD exception unconditionally. */
1234 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1235
1236 /* Free but don't flush the CR4 register. */
1237 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1238
1239 return off;
1240}
1241
1242
1243#define IEM_MC_RAISE_DIVIDE_ERROR() \
1244 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1245
1246/**
1247 * Emits code to raise a \#DE.
1248 *
1249 * @returns New code buffer offset, UINT32_MAX on failure.
1250 * @param pReNative The native recompile state.
1251 * @param off The code buffer offset.
1252 * @param idxInstr The current instruction.
1253 */
1254DECL_INLINE_THROW(uint32_t)
1255iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1256{
1257 /*
1258 * Make sure we don't have any outstanding guest register writes as we may
1259 */
1260 off = iemNativeRegFlushPendingWrites(pReNative, off);
1261
1262#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1263 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1264#else
1265 RT_NOREF(idxInstr);
1266#endif
1267
1268 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1269
1270 /* raise \#DE exception unconditionally. */
1271 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1272
1273 return off;
1274}
1275
1276
1277#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
1278 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
1279
1280/**
1281 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
1282 *
1283 * @returns New code buffer offset, UINT32_MAX on failure.
1284 * @param pReNative The native recompile state.
1285 * @param off The code buffer offset.
1286 * @param idxInstr The current instruction.
1287 * @param idxVarEffAddr Index of the variable containing the effective address to check.
1288 * @param cbAlign The alignment in bytes to check against.
1289 */
1290DECL_INLINE_THROW(uint32_t)
1291iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
1292{
1293 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
1294 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
1295
1296 /*
1297 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
1298 */
1299 off = iemNativeRegFlushPendingWrites(pReNative, off);
1300
1301#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1302 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1303#else
1304 RT_NOREF(idxInstr);
1305#endif
1306
1307 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
1308 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
1309
1310 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
1311
1312 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
1313 return off;
1314}
1315
1316
1317/*********************************************************************************************************************************
1318* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1319*********************************************************************************************************************************/
1320
1321/**
1322 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1323 *
1324 * @returns Pointer to the condition stack entry on success, NULL on failure
1325 * (too many nestings)
1326 */
1327DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1328{
1329#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1330 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1331#endif
1332
1333 uint32_t const idxStack = pReNative->cCondDepth;
1334 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1335
1336 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1337 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1338
1339 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1340 pEntry->fInElse = false;
1341 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1342 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1343
1344 return pEntry;
1345}
1346
1347
1348/**
1349 * Start of the if-block, snapshotting the register and variable state.
1350 */
1351DECL_INLINE_THROW(void)
1352iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1353{
1354 Assert(offIfBlock != UINT32_MAX);
1355 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1356 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1357 Assert(!pEntry->fInElse);
1358
1359 /* Define the start of the IF block if request or for disassembly purposes. */
1360 if (idxLabelIf != UINT32_MAX)
1361 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1362#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1363 else
1364 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1365#else
1366 RT_NOREF(offIfBlock);
1367#endif
1368
1369#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1370 Assert(pReNative->Core.offPc == 0);
1371#endif
1372
1373 /* Copy the initial state so we can restore it in the 'else' block. */
1374 pEntry->InitialState = pReNative->Core;
1375}
1376
1377
1378#define IEM_MC_ELSE() } while (0); \
1379 off = iemNativeEmitElse(pReNative, off); \
1380 do {
1381
1382/** Emits code related to IEM_MC_ELSE. */
1383DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1384{
1385 /* Check sanity and get the conditional stack entry. */
1386 Assert(off != UINT32_MAX);
1387 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1388 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1389 Assert(!pEntry->fInElse);
1390
1391#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1392 /* Writeback any dirty shadow registers. */
1393 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1394 * in one of the branches and leave guest registers already dirty before the start of the if
1395 * block alone. */
1396 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1397#endif
1398
1399 /* Jump to the endif */
1400 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1401
1402 /* Define the else label and enter the else part of the condition. */
1403 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1404 pEntry->fInElse = true;
1405
1406#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1407 Assert(pReNative->Core.offPc == 0);
1408#endif
1409
1410 /* Snapshot the core state so we can do a merge at the endif and restore
1411 the snapshot we took at the start of the if-block. */
1412 pEntry->IfFinalState = pReNative->Core;
1413 pReNative->Core = pEntry->InitialState;
1414
1415 return off;
1416}
1417
1418
1419#define IEM_MC_ENDIF() } while (0); \
1420 off = iemNativeEmitEndIf(pReNative, off)
1421
1422/** Emits code related to IEM_MC_ENDIF. */
1423DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1424{
1425 /* Check sanity and get the conditional stack entry. */
1426 Assert(off != UINT32_MAX);
1427 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1428 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1429
1430#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1431 Assert(pReNative->Core.offPc == 0);
1432#endif
1433#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1434 /* Writeback any dirty shadow registers (else branch). */
1435 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1436 * in one of the branches and leave guest registers already dirty before the start of the if
1437 * block alone. */
1438 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1439#endif
1440
1441 /*
1442 * Now we have find common group with the core state at the end of the
1443 * if-final. Use the smallest common denominator and just drop anything
1444 * that isn't the same in both states.
1445 */
1446 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1447 * which is why we're doing this at the end of the else-block.
1448 * But we'd need more info about future for that to be worth the effort. */
1449 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1450#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1451 Assert( pOther->bmGstRegShadowDirty == 0
1452 && pReNative->Core.bmGstRegShadowDirty == 0);
1453#endif
1454
1455 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1456 {
1457 /* shadow guest stuff first. */
1458 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1459 if (fGstRegs)
1460 {
1461 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1462 do
1463 {
1464 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1465 fGstRegs &= ~RT_BIT_64(idxGstReg);
1466
1467 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1468 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1469 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1470 {
1471 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1472 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1473
1474#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1475 /* Writeback any dirty shadow registers we are about to unshadow. */
1476 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1477#endif
1478 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1479 }
1480 } while (fGstRegs);
1481 }
1482 else
1483 {
1484 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1485#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1486 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1487#endif
1488 }
1489
1490 /* Check variables next. For now we must require them to be identical
1491 or stuff we can recreate. */
1492 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1493 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1494 if (fVars)
1495 {
1496 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1497 do
1498 {
1499 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1500 fVars &= ~RT_BIT_32(idxVar);
1501
1502 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1503 {
1504 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1505 continue;
1506 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1507 {
1508 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1509 if (idxHstReg != UINT8_MAX)
1510 {
1511 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1512 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1513 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1514 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1515 }
1516 continue;
1517 }
1518 }
1519 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1520 continue;
1521
1522 /* Irreconcilable, so drop it. */
1523 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1524 if (idxHstReg != UINT8_MAX)
1525 {
1526 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1527 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1528 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1529 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1530 }
1531 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1532 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1533 } while (fVars);
1534 }
1535
1536 /* Finally, check that the host register allocations matches. */
1537 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1538 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1539 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1540 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1541 }
1542
1543 /*
1544 * Define the endif label and maybe the else one if we're still in the 'if' part.
1545 */
1546 if (!pEntry->fInElse)
1547 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1548 else
1549 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1550 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1551
1552 /* Pop the conditional stack.*/
1553 pReNative->cCondDepth -= 1;
1554
1555 return off;
1556}
1557
1558
1559#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1560 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1561 do {
1562
1563/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1564DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1565{
1566 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1568
1569 /* Get the eflags. */
1570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1571 kIemNativeGstRegUse_ReadOnly);
1572
1573 /* Test and jump. */
1574 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1575
1576 /* Free but don't flush the EFlags register. */
1577 iemNativeRegFreeTmp(pReNative, idxEflReg);
1578
1579 /* Make a copy of the core state now as we start the if-block. */
1580 iemNativeCondStartIfBlock(pReNative, off);
1581
1582 return off;
1583}
1584
1585
1586#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1587 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1588 do {
1589
1590/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1591DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1592{
1593 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1594 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1595
1596 /* Get the eflags. */
1597 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1598 kIemNativeGstRegUse_ReadOnly);
1599
1600 /* Test and jump. */
1601 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1602
1603 /* Free but don't flush the EFlags register. */
1604 iemNativeRegFreeTmp(pReNative, idxEflReg);
1605
1606 /* Make a copy of the core state now as we start the if-block. */
1607 iemNativeCondStartIfBlock(pReNative, off);
1608
1609 return off;
1610}
1611
1612
1613#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1614 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1615 do {
1616
1617/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1618DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1619{
1620 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1621 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1622
1623 /* Get the eflags. */
1624 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1625 kIemNativeGstRegUse_ReadOnly);
1626
1627 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1628 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1629
1630 /* Test and jump. */
1631 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1632
1633 /* Free but don't flush the EFlags register. */
1634 iemNativeRegFreeTmp(pReNative, idxEflReg);
1635
1636 /* Make a copy of the core state now as we start the if-block. */
1637 iemNativeCondStartIfBlock(pReNative, off);
1638
1639 return off;
1640}
1641
1642
1643#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1644 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1645 do {
1646
1647/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1648DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1649{
1650 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1651 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1652
1653 /* Get the eflags. */
1654 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1655 kIemNativeGstRegUse_ReadOnly);
1656
1657 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1658 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1659
1660 /* Test and jump. */
1661 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1662
1663 /* Free but don't flush the EFlags register. */
1664 iemNativeRegFreeTmp(pReNative, idxEflReg);
1665
1666 /* Make a copy of the core state now as we start the if-block. */
1667 iemNativeCondStartIfBlock(pReNative, off);
1668
1669 return off;
1670}
1671
1672
1673#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1674 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1675 do {
1676
1677#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1678 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1679 do {
1680
1681/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1682DECL_INLINE_THROW(uint32_t)
1683iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1684 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1685{
1686 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1687 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1688
1689 /* Get the eflags. */
1690 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1691 kIemNativeGstRegUse_ReadOnly);
1692
1693 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1694 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1695
1696 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1697 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1698 Assert(iBitNo1 != iBitNo2);
1699
1700#ifdef RT_ARCH_AMD64
1701 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1702
1703 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1704 if (iBitNo1 > iBitNo2)
1705 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1706 else
1707 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1708 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1709
1710#elif defined(RT_ARCH_ARM64)
1711 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1712 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1713
1714 /* and tmpreg, eflreg, #1<<iBitNo1 */
1715 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1716
1717 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1718 if (iBitNo1 > iBitNo2)
1719 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1720 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1721 else
1722 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1723 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1724
1725 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1726
1727#else
1728# error "Port me"
1729#endif
1730
1731 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1732 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1733 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1734
1735 /* Free but don't flush the EFlags and tmp registers. */
1736 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1737 iemNativeRegFreeTmp(pReNative, idxEflReg);
1738
1739 /* Make a copy of the core state now as we start the if-block. */
1740 iemNativeCondStartIfBlock(pReNative, off);
1741
1742 return off;
1743}
1744
1745
1746#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1747 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1748 do {
1749
1750#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1751 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1752 do {
1753
1754/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1755 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1756DECL_INLINE_THROW(uint32_t)
1757iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1758 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1759{
1760 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1761 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1762
1763 /* We need an if-block label for the non-inverted variant. */
1764 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1765 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1766
1767 /* Get the eflags. */
1768 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1769 kIemNativeGstRegUse_ReadOnly);
1770
1771 /* Translate the flag masks to bit numbers. */
1772 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1773 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1774
1775 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1776 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1777 Assert(iBitNo1 != iBitNo);
1778
1779 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1780 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1781 Assert(iBitNo2 != iBitNo);
1782 Assert(iBitNo2 != iBitNo1);
1783
1784#ifdef RT_ARCH_AMD64
1785 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1786#elif defined(RT_ARCH_ARM64)
1787 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1788#endif
1789
1790 /* Check for the lone bit first. */
1791 if (!fInverted)
1792 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1793 else
1794 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1795
1796 /* Then extract and compare the other two bits. */
1797#ifdef RT_ARCH_AMD64
1798 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1799 if (iBitNo1 > iBitNo2)
1800 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1801 else
1802 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1803 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1804
1805#elif defined(RT_ARCH_ARM64)
1806 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1807
1808 /* and tmpreg, eflreg, #1<<iBitNo1 */
1809 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1810
1811 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1812 if (iBitNo1 > iBitNo2)
1813 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1814 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1815 else
1816 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1817 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1818
1819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1820
1821#else
1822# error "Port me"
1823#endif
1824
1825 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1826 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1827 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1828
1829 /* Free but don't flush the EFlags and tmp registers. */
1830 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1831 iemNativeRegFreeTmp(pReNative, idxEflReg);
1832
1833 /* Make a copy of the core state now as we start the if-block. */
1834 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1835
1836 return off;
1837}
1838
1839
1840#define IEM_MC_IF_CX_IS_NZ() \
1841 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1842 do {
1843
1844/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1845DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1846{
1847 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1848
1849 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1850 kIemNativeGstRegUse_ReadOnly);
1851 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1852 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1853
1854 iemNativeCondStartIfBlock(pReNative, off);
1855 return off;
1856}
1857
1858
1859#define IEM_MC_IF_ECX_IS_NZ() \
1860 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1861 do {
1862
1863#define IEM_MC_IF_RCX_IS_NZ() \
1864 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1865 do {
1866
1867/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1868DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1869{
1870 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1871
1872 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1873 kIemNativeGstRegUse_ReadOnly);
1874 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1875 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1876
1877 iemNativeCondStartIfBlock(pReNative, off);
1878 return off;
1879}
1880
1881
1882#define IEM_MC_IF_CX_IS_NOT_ONE() \
1883 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1884 do {
1885
1886/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1887DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1888{
1889 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1890
1891 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1892 kIemNativeGstRegUse_ReadOnly);
1893#ifdef RT_ARCH_AMD64
1894 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1895#else
1896 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1897 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1898 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1899#endif
1900 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1901
1902 iemNativeCondStartIfBlock(pReNative, off);
1903 return off;
1904}
1905
1906
1907#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1908 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1909 do {
1910
1911#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1912 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1913 do {
1914
1915/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1916DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1917{
1918 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1919
1920 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1921 kIemNativeGstRegUse_ReadOnly);
1922 if (f64Bit)
1923 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1924 else
1925 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1926 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1927
1928 iemNativeCondStartIfBlock(pReNative, off);
1929 return off;
1930}
1931
1932
1933#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1934 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1935 do {
1936
1937#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1938 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1939 do {
1940
1941/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1942 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1943DECL_INLINE_THROW(uint32_t)
1944iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1945{
1946 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1947 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1948
1949 /* We have to load both RCX and EFLAGS before we can start branching,
1950 otherwise we'll end up in the else-block with an inconsistent
1951 register allocator state.
1952 Doing EFLAGS first as it's more likely to be loaded, right? */
1953 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1954 kIemNativeGstRegUse_ReadOnly);
1955 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1956 kIemNativeGstRegUse_ReadOnly);
1957
1958 /** @todo we could reduce this to a single branch instruction by spending a
1959 * temporary register and some setnz stuff. Not sure if loops are
1960 * worth it. */
1961 /* Check CX. */
1962#ifdef RT_ARCH_AMD64
1963 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1964#else
1965 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1966 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1967 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1968#endif
1969
1970 /* Check the EFlags bit. */
1971 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1972 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1973 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1974 !fCheckIfSet /*fJmpIfSet*/);
1975
1976 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1977 iemNativeRegFreeTmp(pReNative, idxEflReg);
1978
1979 iemNativeCondStartIfBlock(pReNative, off);
1980 return off;
1981}
1982
1983
1984#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1985 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1986 do {
1987
1988#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1989 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1990 do {
1991
1992#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1993 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1994 do {
1995
1996#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1997 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1998 do {
1999
2000/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
2001 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
2002 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
2003 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2004DECL_INLINE_THROW(uint32_t)
2005iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2006 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
2007{
2008 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2009 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2010
2011 /* We have to load both RCX and EFLAGS before we can start branching,
2012 otherwise we'll end up in the else-block with an inconsistent
2013 register allocator state.
2014 Doing EFLAGS first as it's more likely to be loaded, right? */
2015 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2016 kIemNativeGstRegUse_ReadOnly);
2017 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2018 kIemNativeGstRegUse_ReadOnly);
2019
2020 /** @todo we could reduce this to a single branch instruction by spending a
2021 * temporary register and some setnz stuff. Not sure if loops are
2022 * worth it. */
2023 /* Check RCX/ECX. */
2024 if (f64Bit)
2025 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2026 else
2027 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2028
2029 /* Check the EFlags bit. */
2030 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2031 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2032 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
2033 !fCheckIfSet /*fJmpIfSet*/);
2034
2035 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2036 iemNativeRegFreeTmp(pReNative, idxEflReg);
2037
2038 iemNativeCondStartIfBlock(pReNative, off);
2039 return off;
2040}
2041
2042
2043#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
2044 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
2045 do {
2046
2047/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
2048DECL_INLINE_THROW(uint32_t)
2049iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
2050{
2051 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2052
2053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
2054 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
2055 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2056 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2057
2058 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
2059
2060 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
2061
2062 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
2063
2064 iemNativeCondStartIfBlock(pReNative, off);
2065 return off;
2066}
2067
2068
2069#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
2070 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
2071 do {
2072
2073/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
2074DECL_INLINE_THROW(uint32_t)
2075iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
2076{
2077 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2078 Assert(iGReg < 16);
2079
2080 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2081 kIemNativeGstRegUse_ReadOnly);
2082
2083 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2084
2085 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2086
2087 iemNativeCondStartIfBlock(pReNative, off);
2088 return off;
2089}
2090
2091
2092#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2093
2094#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
2095 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
2096 do {
2097
2098/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
2099DECL_INLINE_THROW(uint32_t)
2100iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2101{
2102 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2103
2104 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2105 kIemNativeGstRegUse_Calculation);
2106 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2107
2108 /* mov tmp0, mxcsr */
2109 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2110 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
2111 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
2112 /* mxcsr &= X86_MXCSR_XCPT_MASK */
2113 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
2114 /* mxcsr ~= mxcsr */
2115 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
2116 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
2117 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
2118 /* tmp0 &= mxcsr */
2119 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2120
2121 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
2122 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
2123 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2124
2125 iemNativeCondStartIfBlock(pReNative, off);
2126 return off;
2127}
2128
2129#endif
2130
2131
2132/*********************************************************************************************************************************
2133* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2134*********************************************************************************************************************************/
2135
2136#define IEM_MC_NOREF(a_Name) \
2137 RT_NOREF_PV(a_Name)
2138
2139#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2140 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2141
2142#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2143 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2144
2145#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2146 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2147
2148#define IEM_MC_LOCAL(a_Type, a_Name) \
2149 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2150
2151#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2152 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2153
2154#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2155 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2156
2157
2158/**
2159 * Sets the host register for @a idxVarRc to @a idxReg.
2160 *
2161 * The register must not be allocated. Any guest register shadowing will be
2162 * implictly dropped by this call.
2163 *
2164 * The variable must not have any register associated with it (causes
2165 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2166 * implied.
2167 *
2168 * @returns idxReg
2169 * @param pReNative The recompiler state.
2170 * @param idxVar The variable.
2171 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2172 * @param off For recording in debug info.
2173 *
2174 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2175 */
2176DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2177{
2178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2179 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2180 Assert(!pVar->fRegAcquired);
2181 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2182 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2183 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2184
2185 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2186 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2187
2188 iemNativeVarSetKindToStack(pReNative, idxVar);
2189 pVar->idxReg = idxReg;
2190
2191 return idxReg;
2192}
2193
2194
2195/**
2196 * A convenient helper function.
2197 */
2198DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2199 uint8_t idxReg, uint32_t *poff)
2200{
2201 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2202 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2203 return idxReg;
2204}
2205
2206
2207/**
2208 * This is called by IEM_MC_END() to clean up all variables.
2209 */
2210DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2211{
2212 uint32_t const bmVars = pReNative->Core.bmVars;
2213 if (bmVars != 0)
2214 iemNativeVarFreeAllSlow(pReNative, bmVars);
2215 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2216 Assert(pReNative->Core.bmStack == 0);
2217}
2218
2219
2220#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2221
2222/**
2223 * This is called by IEM_MC_FREE_LOCAL.
2224 */
2225DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2226{
2227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2228 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2229 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2230}
2231
2232
2233#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2234
2235/**
2236 * This is called by IEM_MC_FREE_ARG.
2237 */
2238DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2239{
2240 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2241 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2242 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2243}
2244
2245
2246#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2247
2248/**
2249 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2250 */
2251DECL_INLINE_THROW(uint32_t)
2252iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2253{
2254 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2255 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2256 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2257 Assert( pVarDst->cbVar == sizeof(uint16_t)
2258 || pVarDst->cbVar == sizeof(uint32_t));
2259
2260 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2261 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2262 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2263 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2264 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2265
2266 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2267
2268 /*
2269 * Special case for immediates.
2270 */
2271 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2272 {
2273 switch (pVarDst->cbVar)
2274 {
2275 case sizeof(uint16_t):
2276 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2277 break;
2278 case sizeof(uint32_t):
2279 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2280 break;
2281 default: AssertFailed(); break;
2282 }
2283 }
2284 else
2285 {
2286 /*
2287 * The generic solution for now.
2288 */
2289 /** @todo optimize this by having the python script make sure the source
2290 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2291 * statement. Then we could just transfer the register assignments. */
2292 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2293 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2294 switch (pVarDst->cbVar)
2295 {
2296 case sizeof(uint16_t):
2297 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2298 break;
2299 case sizeof(uint32_t):
2300 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2301 break;
2302 default: AssertFailed(); break;
2303 }
2304 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2305 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2306 }
2307 return off;
2308}
2309
2310
2311
2312/*********************************************************************************************************************************
2313* Emitters for IEM_MC_CALL_CIMPL_XXX *
2314*********************************************************************************************************************************/
2315
2316/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2317DECL_INLINE_THROW(uint32_t)
2318iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2319 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2320
2321{
2322 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2323
2324#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2325 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2326 when a calls clobber any of the relevatn control registers. */
2327# if 1
2328 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2329 {
2330 /* Likely as long as call+ret are done via cimpl. */
2331 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2332 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2333 }
2334 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2335 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2336 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2337 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2338 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2339 else
2340 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2341 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2342 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2343
2344# else
2345 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2346 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2347 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2348 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2349 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2350 || pfnCImpl == (uintptr_t)iemCImpl_callf
2351 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2352 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2353 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2354 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2355 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2356# endif
2357#endif
2358
2359 /*
2360 * Do all the call setup and cleanup.
2361 */
2362 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2363
2364 /*
2365 * Load the two or three hidden arguments.
2366 */
2367#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2368 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2369 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2370 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2371#else
2372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2373 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2374#endif
2375
2376 /*
2377 * Make the call and check the return code.
2378 *
2379 * Shadow PC copies are always flushed here, other stuff depends on flags.
2380 * Segment and general purpose registers are explictily flushed via the
2381 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2382 * macros.
2383 */
2384 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2385#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2386 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2387#endif
2388 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2389 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2390 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2391 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2392
2393 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2394}
2395
2396
2397#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2398 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2399
2400/** Emits code for IEM_MC_CALL_CIMPL_1. */
2401DECL_INLINE_THROW(uint32_t)
2402iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2403 uintptr_t pfnCImpl, uint8_t idxArg0)
2404{
2405 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2406 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2407}
2408
2409
2410#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2411 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2412
2413/** Emits code for IEM_MC_CALL_CIMPL_2. */
2414DECL_INLINE_THROW(uint32_t)
2415iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2416 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2417{
2418 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2419 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2420 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2421}
2422
2423
2424#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2425 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2426 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2427
2428/** Emits code for IEM_MC_CALL_CIMPL_3. */
2429DECL_INLINE_THROW(uint32_t)
2430iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2431 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2432{
2433 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2434 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2435 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2436 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2437}
2438
2439
2440#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2441 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2442 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2443
2444/** Emits code for IEM_MC_CALL_CIMPL_4. */
2445DECL_INLINE_THROW(uint32_t)
2446iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2447 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2448{
2449 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2450 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2451 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2452 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2453 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2454}
2455
2456
2457#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2458 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2459 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2460
2461/** Emits code for IEM_MC_CALL_CIMPL_4. */
2462DECL_INLINE_THROW(uint32_t)
2463iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2464 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2465{
2466 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2467 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2468 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2469 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2470 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2471 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2472}
2473
2474
2475/** Recompiler debugging: Flush guest register shadow copies. */
2476#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2477
2478
2479
2480/*********************************************************************************************************************************
2481* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2482*********************************************************************************************************************************/
2483
2484/**
2485 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2486 */
2487DECL_INLINE_THROW(uint32_t)
2488iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2489 uintptr_t pfnAImpl, uint8_t cArgs)
2490{
2491 if (idxVarRc != UINT8_MAX)
2492 {
2493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2494 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2495 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2496 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2497 }
2498
2499 /*
2500 * Do all the call setup and cleanup.
2501 *
2502 * It is only required to flush pending guest register writes in call volatile registers as
2503 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2504 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2505 * no matter the fFlushPendingWrites parameter.
2506 */
2507 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2508
2509 /*
2510 * Make the call and update the return code variable if we've got one.
2511 */
2512 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2513 if (idxVarRc != UINT8_MAX)
2514 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2515
2516 return off;
2517}
2518
2519
2520
2521#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2522 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2523
2524#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2525 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2526
2527/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2528DECL_INLINE_THROW(uint32_t)
2529iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2530{
2531 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2532}
2533
2534
2535#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2536 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2537
2538#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2539 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2540
2541/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2542DECL_INLINE_THROW(uint32_t)
2543iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2544{
2545 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2546 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2547}
2548
2549
2550#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2551 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2552
2553#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2554 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2555
2556/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2557DECL_INLINE_THROW(uint32_t)
2558iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2559 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2560{
2561 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2562 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2563 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2564}
2565
2566
2567#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2568 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2569
2570#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2571 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2572
2573/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2574DECL_INLINE_THROW(uint32_t)
2575iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2576 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2577{
2578 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2579 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2580 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2581 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2582}
2583
2584
2585#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2586 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2587
2588#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2589 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2590
2591/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2592DECL_INLINE_THROW(uint32_t)
2593iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2594 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2595{
2596 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2597 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2598 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2599 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2600 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2601}
2602
2603
2604
2605/*********************************************************************************************************************************
2606* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2607*********************************************************************************************************************************/
2608
2609#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2610 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2611
2612#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2613 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2614
2615#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2616 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2617
2618#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2619 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2620
2621
2622/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2623 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2624DECL_INLINE_THROW(uint32_t)
2625iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2626{
2627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2628 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2629 Assert(iGRegEx < 20);
2630
2631 /* Same discussion as in iemNativeEmitFetchGregU16 */
2632 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2633 kIemNativeGstRegUse_ReadOnly);
2634
2635 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2636 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2637
2638 /* The value is zero-extended to the full 64-bit host register width. */
2639 if (iGRegEx < 16)
2640 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2641 else
2642 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2643
2644 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2645 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2646 return off;
2647}
2648
2649
2650#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2651 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2652
2653#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2654 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2655
2656#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2657 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2658
2659/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2660DECL_INLINE_THROW(uint32_t)
2661iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2662{
2663 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2664 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2665 Assert(iGRegEx < 20);
2666
2667 /* Same discussion as in iemNativeEmitFetchGregU16 */
2668 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2669 kIemNativeGstRegUse_ReadOnly);
2670
2671 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2672 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2673
2674 if (iGRegEx < 16)
2675 {
2676 switch (cbSignExtended)
2677 {
2678 case sizeof(uint16_t):
2679 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2680 break;
2681 case sizeof(uint32_t):
2682 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2683 break;
2684 case sizeof(uint64_t):
2685 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2686 break;
2687 default: AssertFailed(); break;
2688 }
2689 }
2690 else
2691 {
2692 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2693 switch (cbSignExtended)
2694 {
2695 case sizeof(uint16_t):
2696 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2697 break;
2698 case sizeof(uint32_t):
2699 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2700 break;
2701 case sizeof(uint64_t):
2702 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2703 break;
2704 default: AssertFailed(); break;
2705 }
2706 }
2707
2708 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2709 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2710 return off;
2711}
2712
2713
2714
2715#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2716 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2717
2718#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2719 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2720
2721#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2722 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2723
2724/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2725DECL_INLINE_THROW(uint32_t)
2726iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2727{
2728 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2729 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2730 Assert(iGReg < 16);
2731
2732 /*
2733 * We can either just load the low 16-bit of the GPR into a host register
2734 * for the variable, or we can do so via a shadow copy host register. The
2735 * latter will avoid having to reload it if it's being stored later, but
2736 * will waste a host register if it isn't touched again. Since we don't
2737 * know what going to happen, we choose the latter for now.
2738 */
2739 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2740 kIemNativeGstRegUse_ReadOnly);
2741
2742 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2743 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2744 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2745 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2746
2747 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2748 return off;
2749}
2750
2751
2752#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2753 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2754
2755#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2756 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2757
2758/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2759DECL_INLINE_THROW(uint32_t)
2760iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2761{
2762 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2763 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2764 Assert(iGReg < 16);
2765
2766 /*
2767 * We can either just load the low 16-bit of the GPR into a host register
2768 * for the variable, or we can do so via a shadow copy host register. The
2769 * latter will avoid having to reload it if it's being stored later, but
2770 * will waste a host register if it isn't touched again. Since we don't
2771 * know what going to happen, we choose the latter for now.
2772 */
2773 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2774 kIemNativeGstRegUse_ReadOnly);
2775
2776 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2777 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2778 if (cbSignExtended == sizeof(uint32_t))
2779 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2780 else
2781 {
2782 Assert(cbSignExtended == sizeof(uint64_t));
2783 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2784 }
2785 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2786
2787 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2788 return off;
2789}
2790
2791
2792#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2793 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2794
2795#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2796 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2797
2798/** Emits code for IEM_MC_FETCH_GREG_U32. */
2799DECL_INLINE_THROW(uint32_t)
2800iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2801{
2802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2803 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2804 Assert(iGReg < 16);
2805
2806 /*
2807 * We can either just load the low 16-bit of the GPR into a host register
2808 * for the variable, or we can do so via a shadow copy host register. The
2809 * latter will avoid having to reload it if it's being stored later, but
2810 * will waste a host register if it isn't touched again. Since we don't
2811 * know what going to happen, we choose the latter for now.
2812 */
2813 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2814 kIemNativeGstRegUse_ReadOnly);
2815
2816 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2817 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2818 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2819 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2820
2821 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2822 return off;
2823}
2824
2825
2826#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2827 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2828
2829/** Emits code for IEM_MC_FETCH_GREG_U32. */
2830DECL_INLINE_THROW(uint32_t)
2831iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2832{
2833 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2834 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2835 Assert(iGReg < 16);
2836
2837 /*
2838 * We can either just load the low 32-bit of the GPR into a host register
2839 * for the variable, or we can do so via a shadow copy host register. The
2840 * latter will avoid having to reload it if it's being stored later, but
2841 * will waste a host register if it isn't touched again. Since we don't
2842 * know what going to happen, we choose the latter for now.
2843 */
2844 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2845 kIemNativeGstRegUse_ReadOnly);
2846
2847 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2848 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2849 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2850 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2851
2852 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2853 return off;
2854}
2855
2856
2857#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2858 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2859
2860#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2861 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2862
2863/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2864 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2865DECL_INLINE_THROW(uint32_t)
2866iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2867{
2868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2869 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2870 Assert(iGReg < 16);
2871
2872 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2873 kIemNativeGstRegUse_ReadOnly);
2874
2875 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2876 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2877 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2878 /** @todo name the register a shadow one already? */
2879 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2880
2881 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2882 return off;
2883}
2884
2885
2886#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2887#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
2888 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
2889
2890/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
2891DECL_INLINE_THROW(uint32_t)
2892iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
2893{
2894 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2895 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
2896 Assert(iGRegLo < 16 && iGRegHi < 16);
2897
2898 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
2899 kIemNativeGstRegUse_ReadOnly);
2900 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
2901 kIemNativeGstRegUse_ReadOnly);
2902
2903 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2904 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
2905 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
2906 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
2907
2908 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
2909 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
2910 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
2911 return off;
2912}
2913#endif
2914
2915
2916/*********************************************************************************************************************************
2917* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2918*********************************************************************************************************************************/
2919
2920#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2921 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2922
2923/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2924DECL_INLINE_THROW(uint32_t)
2925iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2926{
2927 Assert(iGRegEx < 20);
2928 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2929 kIemNativeGstRegUse_ForUpdate);
2930#ifdef RT_ARCH_AMD64
2931 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2932
2933 /* To the lowest byte of the register: mov r8, imm8 */
2934 if (iGRegEx < 16)
2935 {
2936 if (idxGstTmpReg >= 8)
2937 pbCodeBuf[off++] = X86_OP_REX_B;
2938 else if (idxGstTmpReg >= 4)
2939 pbCodeBuf[off++] = X86_OP_REX;
2940 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2941 pbCodeBuf[off++] = u8Value;
2942 }
2943 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2944 else if (idxGstTmpReg < 4)
2945 {
2946 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2947 pbCodeBuf[off++] = u8Value;
2948 }
2949 else
2950 {
2951 /* ror reg64, 8 */
2952 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2953 pbCodeBuf[off++] = 0xc1;
2954 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2955 pbCodeBuf[off++] = 8;
2956
2957 /* mov reg8, imm8 */
2958 if (idxGstTmpReg >= 8)
2959 pbCodeBuf[off++] = X86_OP_REX_B;
2960 else if (idxGstTmpReg >= 4)
2961 pbCodeBuf[off++] = X86_OP_REX;
2962 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2963 pbCodeBuf[off++] = u8Value;
2964
2965 /* rol reg64, 8 */
2966 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2967 pbCodeBuf[off++] = 0xc1;
2968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2969 pbCodeBuf[off++] = 8;
2970 }
2971
2972#elif defined(RT_ARCH_ARM64)
2973 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2974 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2975 if (iGRegEx < 16)
2976 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2977 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2978 else
2979 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2980 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2981 iemNativeRegFreeTmp(pReNative, idxImmReg);
2982
2983#else
2984# error "Port me!"
2985#endif
2986
2987 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2988
2989#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2990 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2991#endif
2992
2993 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2994 return off;
2995}
2996
2997
2998#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2999 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
3000
3001/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
3002DECL_INLINE_THROW(uint32_t)
3003iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
3004{
3005 Assert(iGRegEx < 20);
3006 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3007
3008 /*
3009 * If it's a constant value (unlikely) we treat this as a
3010 * IEM_MC_STORE_GREG_U8_CONST statement.
3011 */
3012 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3013 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3014 { /* likely */ }
3015 else
3016 {
3017 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3018 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3019 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
3020 }
3021
3022 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3023 kIemNativeGstRegUse_ForUpdate);
3024 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3025
3026#ifdef RT_ARCH_AMD64
3027 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
3028 if (iGRegEx < 16)
3029 {
3030 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3031 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3032 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3033 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3034 pbCodeBuf[off++] = X86_OP_REX;
3035 pbCodeBuf[off++] = 0x8a;
3036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3037 }
3038 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
3039 else if (idxGstTmpReg < 4 && idxVarReg < 4)
3040 {
3041 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
3042 pbCodeBuf[off++] = 0x8a;
3043 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
3044 }
3045 else
3046 {
3047 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
3048
3049 /* ror reg64, 8 */
3050 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3051 pbCodeBuf[off++] = 0xc1;
3052 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3053 pbCodeBuf[off++] = 8;
3054
3055 /* mov reg8, reg8(r/m) */
3056 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3057 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3058 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3059 pbCodeBuf[off++] = X86_OP_REX;
3060 pbCodeBuf[off++] = 0x8a;
3061 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3062
3063 /* rol reg64, 8 */
3064 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3065 pbCodeBuf[off++] = 0xc1;
3066 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3067 pbCodeBuf[off++] = 8;
3068 }
3069
3070#elif defined(RT_ARCH_ARM64)
3071 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
3072 or
3073 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
3074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3075 if (iGRegEx < 16)
3076 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
3077 else
3078 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
3079
3080#else
3081# error "Port me!"
3082#endif
3083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3084
3085 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3086
3087#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3088 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3089#endif
3090 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3091 return off;
3092}
3093
3094
3095
3096#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
3097 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
3098
3099/** Emits code for IEM_MC_STORE_GREG_U16. */
3100DECL_INLINE_THROW(uint32_t)
3101iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
3102{
3103 Assert(iGReg < 16);
3104 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3105 kIemNativeGstRegUse_ForUpdate);
3106#ifdef RT_ARCH_AMD64
3107 /* mov reg16, imm16 */
3108 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3109 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3110 if (idxGstTmpReg >= 8)
3111 pbCodeBuf[off++] = X86_OP_REX_B;
3112 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3113 pbCodeBuf[off++] = RT_BYTE1(uValue);
3114 pbCodeBuf[off++] = RT_BYTE2(uValue);
3115
3116#elif defined(RT_ARCH_ARM64)
3117 /* movk xdst, #uValue, lsl #0 */
3118 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3119 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3120
3121#else
3122# error "Port me!"
3123#endif
3124
3125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3126
3127#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3128 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3129#endif
3130 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3131 return off;
3132}
3133
3134
3135#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3136 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3137
3138/** Emits code for IEM_MC_STORE_GREG_U16. */
3139DECL_INLINE_THROW(uint32_t)
3140iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3141{
3142 Assert(iGReg < 16);
3143 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3144
3145 /*
3146 * If it's a constant value (unlikely) we treat this as a
3147 * IEM_MC_STORE_GREG_U16_CONST statement.
3148 */
3149 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3150 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3151 { /* likely */ }
3152 else
3153 {
3154 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3155 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3156 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3157 }
3158
3159 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3160 kIemNativeGstRegUse_ForUpdate);
3161
3162#ifdef RT_ARCH_AMD64
3163 /* mov reg16, reg16 or [mem16] */
3164 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3165 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3166 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3167 {
3168 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3169 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3170 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3171 pbCodeBuf[off++] = 0x8b;
3172 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3173 }
3174 else
3175 {
3176 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3177 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3178 if (idxGstTmpReg >= 8)
3179 pbCodeBuf[off++] = X86_OP_REX_R;
3180 pbCodeBuf[off++] = 0x8b;
3181 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3182 }
3183
3184#elif defined(RT_ARCH_ARM64)
3185 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3186 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3187 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3188 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3189 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3190
3191#else
3192# error "Port me!"
3193#endif
3194
3195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3196
3197#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3198 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3199#endif
3200 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3201 return off;
3202}
3203
3204
3205#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3206 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3207
3208/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3209DECL_INLINE_THROW(uint32_t)
3210iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3211{
3212 Assert(iGReg < 16);
3213 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3214 kIemNativeGstRegUse_ForFullWrite);
3215 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3216#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3217 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3218#endif
3219 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3220 return off;
3221}
3222
3223
3224#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3225 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3226
3227/** Emits code for IEM_MC_STORE_GREG_U32. */
3228DECL_INLINE_THROW(uint32_t)
3229iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3230{
3231 Assert(iGReg < 16);
3232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3233
3234 /*
3235 * If it's a constant value (unlikely) we treat this as a
3236 * IEM_MC_STORE_GREG_U32_CONST statement.
3237 */
3238 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3239 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3240 { /* likely */ }
3241 else
3242 {
3243 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3244 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3245 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3246 }
3247
3248 /*
3249 * For the rest we allocate a guest register for the variable and writes
3250 * it to the CPUMCTX structure.
3251 */
3252 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3253#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3254 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3255#else
3256 RT_NOREF(idxVarReg);
3257#endif
3258#ifdef VBOX_STRICT
3259 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3260#endif
3261 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3262 return off;
3263}
3264
3265
3266#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3267 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3268
3269/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3270DECL_INLINE_THROW(uint32_t)
3271iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3272{
3273 Assert(iGReg < 16);
3274 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3275 kIemNativeGstRegUse_ForFullWrite);
3276 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3277#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3278 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3279#endif
3280 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3281 return off;
3282}
3283
3284
3285#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3286 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3287
3288#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3289 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3290
3291/** Emits code for IEM_MC_STORE_GREG_U64. */
3292DECL_INLINE_THROW(uint32_t)
3293iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3294{
3295 Assert(iGReg < 16);
3296 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3297
3298 /*
3299 * If it's a constant value (unlikely) we treat this as a
3300 * IEM_MC_STORE_GREG_U64_CONST statement.
3301 */
3302 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3303 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3304 { /* likely */ }
3305 else
3306 {
3307 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3308 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3309 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3310 }
3311
3312 /*
3313 * For the rest we allocate a guest register for the variable and writes
3314 * it to the CPUMCTX structure.
3315 */
3316 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3317#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3318 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3319#else
3320 RT_NOREF(idxVarReg);
3321#endif
3322 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3323 return off;
3324}
3325
3326
3327#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3328 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3329
3330/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3331DECL_INLINE_THROW(uint32_t)
3332iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3333{
3334 Assert(iGReg < 16);
3335 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3336 kIemNativeGstRegUse_ForUpdate);
3337 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3338#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3339 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3340#endif
3341 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3342 return off;
3343}
3344
3345
3346#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3347#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
3348 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
3349
3350/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3351DECL_INLINE_THROW(uint32_t)
3352iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
3353{
3354 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3355 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3356 Assert(iGRegLo < 16 && iGRegHi < 16);
3357
3358 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3359 kIemNativeGstRegUse_ForFullWrite);
3360 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3361 kIemNativeGstRegUse_ForFullWrite);
3362
3363 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3364 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
3365 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
3366 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
3367
3368 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3369 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3370 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3371 return off;
3372}
3373#endif
3374
3375
3376/*********************************************************************************************************************************
3377* General purpose register manipulation (add, sub). *
3378*********************************************************************************************************************************/
3379
3380#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3381 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3382
3383/** Emits code for IEM_MC_ADD_GREG_U16. */
3384DECL_INLINE_THROW(uint32_t)
3385iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3386{
3387 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3388 kIemNativeGstRegUse_ForUpdate);
3389
3390#ifdef RT_ARCH_AMD64
3391 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3392 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3393 if (idxGstTmpReg >= 8)
3394 pbCodeBuf[off++] = X86_OP_REX_B;
3395 if (uAddend == 1)
3396 {
3397 pbCodeBuf[off++] = 0xff; /* inc */
3398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3399 }
3400 else
3401 {
3402 pbCodeBuf[off++] = 0x81;
3403 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3404 pbCodeBuf[off++] = uAddend;
3405 pbCodeBuf[off++] = 0;
3406 }
3407
3408#else
3409 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3410 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3411
3412 /* sub tmp, gstgrp, uAddend */
3413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3414
3415 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3416 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3417
3418 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3419#endif
3420
3421 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3422
3423#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3424 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3425#endif
3426
3427 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3428 return off;
3429}
3430
3431
3432#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3433 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3434
3435#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3436 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3437
3438/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3439DECL_INLINE_THROW(uint32_t)
3440iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3441{
3442 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3443 kIemNativeGstRegUse_ForUpdate);
3444
3445#ifdef RT_ARCH_AMD64
3446 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3447 if (f64Bit)
3448 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3449 else if (idxGstTmpReg >= 8)
3450 pbCodeBuf[off++] = X86_OP_REX_B;
3451 if (uAddend == 1)
3452 {
3453 pbCodeBuf[off++] = 0xff; /* inc */
3454 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3455 }
3456 else if (uAddend < 128)
3457 {
3458 pbCodeBuf[off++] = 0x83; /* add */
3459 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3460 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3461 }
3462 else
3463 {
3464 pbCodeBuf[off++] = 0x81; /* add */
3465 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3466 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3467 pbCodeBuf[off++] = 0;
3468 pbCodeBuf[off++] = 0;
3469 pbCodeBuf[off++] = 0;
3470 }
3471
3472#else
3473 /* sub tmp, gstgrp, uAddend */
3474 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3475 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3476
3477#endif
3478
3479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3480
3481#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3482 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3483#endif
3484
3485 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3486 return off;
3487}
3488
3489
3490
3491#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3492 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3493
3494/** Emits code for IEM_MC_SUB_GREG_U16. */
3495DECL_INLINE_THROW(uint32_t)
3496iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3497{
3498 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3499 kIemNativeGstRegUse_ForUpdate);
3500
3501#ifdef RT_ARCH_AMD64
3502 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3503 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3504 if (idxGstTmpReg >= 8)
3505 pbCodeBuf[off++] = X86_OP_REX_B;
3506 if (uSubtrahend == 1)
3507 {
3508 pbCodeBuf[off++] = 0xff; /* dec */
3509 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3510 }
3511 else
3512 {
3513 pbCodeBuf[off++] = 0x81;
3514 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3515 pbCodeBuf[off++] = uSubtrahend;
3516 pbCodeBuf[off++] = 0;
3517 }
3518
3519#else
3520 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3521 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3522
3523 /* sub tmp, gstgrp, uSubtrahend */
3524 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3525
3526 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3527 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3528
3529 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3530#endif
3531
3532 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3533
3534#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3536#endif
3537
3538 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3539 return off;
3540}
3541
3542
3543#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3544 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3545
3546#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3547 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3548
3549/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3550DECL_INLINE_THROW(uint32_t)
3551iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3552{
3553 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3554 kIemNativeGstRegUse_ForUpdate);
3555
3556#ifdef RT_ARCH_AMD64
3557 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3558 if (f64Bit)
3559 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3560 else if (idxGstTmpReg >= 8)
3561 pbCodeBuf[off++] = X86_OP_REX_B;
3562 if (uSubtrahend == 1)
3563 {
3564 pbCodeBuf[off++] = 0xff; /* dec */
3565 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3566 }
3567 else if (uSubtrahend < 128)
3568 {
3569 pbCodeBuf[off++] = 0x83; /* sub */
3570 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3571 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3572 }
3573 else
3574 {
3575 pbCodeBuf[off++] = 0x81; /* sub */
3576 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3577 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3578 pbCodeBuf[off++] = 0;
3579 pbCodeBuf[off++] = 0;
3580 pbCodeBuf[off++] = 0;
3581 }
3582
3583#else
3584 /* sub tmp, gstgrp, uSubtrahend */
3585 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3586 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3587
3588#endif
3589
3590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3591
3592#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3593 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3594#endif
3595
3596 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3597 return off;
3598}
3599
3600
3601#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3602 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3603
3604#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3605 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3606
3607#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3608 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3609
3610#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3611 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3612
3613/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3614DECL_INLINE_THROW(uint32_t)
3615iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3616{
3617#ifdef VBOX_STRICT
3618 switch (cbMask)
3619 {
3620 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3621 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3622 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3623 case sizeof(uint64_t): break;
3624 default: AssertFailedBreak();
3625 }
3626#endif
3627
3628 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3629 kIemNativeGstRegUse_ForUpdate);
3630
3631 switch (cbMask)
3632 {
3633 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3634 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3635 break;
3636 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3637 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3638 break;
3639 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3640 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3641 break;
3642 case sizeof(uint64_t):
3643 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3644 break;
3645 default: AssertFailedBreak();
3646 }
3647
3648 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3649
3650#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3651 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3652#endif
3653
3654 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3655 return off;
3656}
3657
3658
3659#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3660 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3661
3662#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3663 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3664
3665#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3666 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3667
3668#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3669 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3670
3671/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3672DECL_INLINE_THROW(uint32_t)
3673iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3674{
3675#ifdef VBOX_STRICT
3676 switch (cbMask)
3677 {
3678 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3679 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3680 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3681 case sizeof(uint64_t): break;
3682 default: AssertFailedBreak();
3683 }
3684#endif
3685
3686 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3687 kIemNativeGstRegUse_ForUpdate);
3688
3689 switch (cbMask)
3690 {
3691 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3692 case sizeof(uint16_t):
3693 case sizeof(uint64_t):
3694 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3695 break;
3696 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3697 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3698 break;
3699 default: AssertFailedBreak();
3700 }
3701
3702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3703
3704#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3705 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3706#endif
3707
3708 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3709 return off;
3710}
3711
3712
3713/*********************************************************************************************************************************
3714* Local/Argument variable manipulation (add, sub, and, or). *
3715*********************************************************************************************************************************/
3716
3717#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3718 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3719
3720#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3721 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3722
3723#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3724 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3725
3726#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3727 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3728
3729
3730#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3731 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3732
3733#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3734 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3735
3736#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3737 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3738
3739/** Emits code for AND'ing a local and a constant value. */
3740DECL_INLINE_THROW(uint32_t)
3741iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3742{
3743#ifdef VBOX_STRICT
3744 switch (cbMask)
3745 {
3746 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3747 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3748 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3749 case sizeof(uint64_t): break;
3750 default: AssertFailedBreak();
3751 }
3752#endif
3753
3754 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3755 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3756
3757 if (cbMask <= sizeof(uint32_t))
3758 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3759 else
3760 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3761
3762 iemNativeVarRegisterRelease(pReNative, idxVar);
3763 return off;
3764}
3765
3766
3767#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3768 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3769
3770#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3771 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3772
3773#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3774 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3775
3776#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3777 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3778
3779/** Emits code for OR'ing a local and a constant value. */
3780DECL_INLINE_THROW(uint32_t)
3781iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3782{
3783#ifdef VBOX_STRICT
3784 switch (cbMask)
3785 {
3786 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3787 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3788 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3789 case sizeof(uint64_t): break;
3790 default: AssertFailedBreak();
3791 }
3792#endif
3793
3794 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3795 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3796
3797 if (cbMask <= sizeof(uint32_t))
3798 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3799 else
3800 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3801
3802 iemNativeVarRegisterRelease(pReNative, idxVar);
3803 return off;
3804}
3805
3806
3807#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3808 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3809
3810#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3811 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3812
3813#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3814 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3815
3816/** Emits code for reversing the byte order in a local value. */
3817DECL_INLINE_THROW(uint32_t)
3818iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3819{
3820 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3821 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3822
3823 switch (cbLocal)
3824 {
3825 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3826 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3827 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3828 default: AssertFailedBreak();
3829 }
3830
3831 iemNativeVarRegisterRelease(pReNative, idxVar);
3832 return off;
3833}
3834
3835
3836#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3837 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3838
3839#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3840 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3841
3842#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3843 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3844
3845/** Emits code for shifting left a local value. */
3846DECL_INLINE_THROW(uint32_t)
3847iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3848{
3849#ifdef VBOX_STRICT
3850 switch (cbLocal)
3851 {
3852 case sizeof(uint8_t): Assert(cShift < 8); break;
3853 case sizeof(uint16_t): Assert(cShift < 16); break;
3854 case sizeof(uint32_t): Assert(cShift < 32); break;
3855 case sizeof(uint64_t): Assert(cShift < 64); break;
3856 default: AssertFailedBreak();
3857 }
3858#endif
3859
3860 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3861 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3862
3863 if (cbLocal <= sizeof(uint32_t))
3864 {
3865 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3866 if (cbLocal < sizeof(uint32_t))
3867 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3868 cbLocal == sizeof(uint16_t)
3869 ? UINT32_C(0xffff)
3870 : UINT32_C(0xff));
3871 }
3872 else
3873 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3874
3875 iemNativeVarRegisterRelease(pReNative, idxVar);
3876 return off;
3877}
3878
3879
3880#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3881 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3882
3883#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3884 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3885
3886#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3887 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3888
3889/** Emits code for shifting left a local value. */
3890DECL_INLINE_THROW(uint32_t)
3891iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3892{
3893#ifdef VBOX_STRICT
3894 switch (cbLocal)
3895 {
3896 case sizeof(int8_t): Assert(cShift < 8); break;
3897 case sizeof(int16_t): Assert(cShift < 16); break;
3898 case sizeof(int32_t): Assert(cShift < 32); break;
3899 case sizeof(int64_t): Assert(cShift < 64); break;
3900 default: AssertFailedBreak();
3901 }
3902#endif
3903
3904 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3905 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3906
3907 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3908 if (cbLocal == sizeof(uint8_t))
3909 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3910 else if (cbLocal == sizeof(uint16_t))
3911 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3912
3913 if (cbLocal <= sizeof(uint32_t))
3914 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3915 else
3916 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3917
3918 iemNativeVarRegisterRelease(pReNative, idxVar);
3919 return off;
3920}
3921
3922
3923#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3924 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3925
3926#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3927 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3928
3929#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3930 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3931
3932/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3933DECL_INLINE_THROW(uint32_t)
3934iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3935{
3936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3937 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3939 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3940
3941 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3942 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3943
3944 /* Need to sign extend the value. */
3945 if (cbLocal <= sizeof(uint32_t))
3946 {
3947/** @todo ARM64: In case of boredone, the extended add instruction can do the
3948 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3949 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3950
3951 switch (cbLocal)
3952 {
3953 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3954 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3955 default: AssertFailed();
3956 }
3957
3958 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3959 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3960 }
3961 else
3962 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3963
3964 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3965 iemNativeVarRegisterRelease(pReNative, idxVar);
3966 return off;
3967}
3968
3969
3970
3971/*********************************************************************************************************************************
3972* EFLAGS *
3973*********************************************************************************************************************************/
3974
3975#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3976# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3977#else
3978# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3979 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3980
3981DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3982{
3983 if (fEflOutput)
3984 {
3985 PVMCPUCC const pVCpu = pReNative->pVCpu;
3986# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3987 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3988 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3989 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3990# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3991 if (fEflOutput & (a_fEfl)) \
3992 { \
3993 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3994 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3995 else \
3996 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3997 } else do { } while (0)
3998# else
3999 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
4000 IEMLIVENESSBIT const LivenessClobbered =
4001 {
4002 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4003 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4004 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
4005 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
4006 };
4007 IEMLIVENESSBIT const LivenessDelayable =
4008 {
4009 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4010 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
4011 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4012 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
4013 };
4014# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
4015 if (fEflOutput & (a_fEfl)) \
4016 { \
4017 if (LivenessClobbered.a_fLivenessMember) \
4018 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
4019 else if (LivenessDelayable.a_fLivenessMember) \
4020 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
4021 else \
4022 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
4023 } else do { } while (0)
4024# endif
4025 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
4026 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
4027 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
4028 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
4029 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
4030 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
4031 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
4032# undef CHECK_FLAG_AND_UPDATE_STATS
4033 }
4034 RT_NOREF(fEflInput);
4035}
4036#endif /* VBOX_WITH_STATISTICS */
4037
4038#undef IEM_MC_FETCH_EFLAGS /* should not be used */
4039#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4040 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
4041
4042/** Handles IEM_MC_FETCH_EFLAGS_EX. */
4043DECL_INLINE_THROW(uint32_t)
4044iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
4045 uint32_t fEflInput, uint32_t fEflOutput)
4046{
4047 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
4048 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4049 RT_NOREF(fEflInput, fEflOutput);
4050
4051#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4052# ifdef VBOX_STRICT
4053 if ( pReNative->idxCurCall != 0
4054 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
4055 {
4056 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
4057 uint32_t const fBoth = fEflInput | fEflOutput;
4058# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
4059 AssertMsg( !(fBoth & (a_fElfConst)) \
4060 || (!(fEflInput & (a_fElfConst)) \
4061 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4062 : !(fEflOutput & (a_fElfConst)) \
4063 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4064 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
4065 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
4066 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
4067 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
4068 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
4069 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
4070 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
4071 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
4072 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
4073# undef ASSERT_ONE_EFL
4074 }
4075# endif
4076#endif
4077
4078 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4079
4080 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
4081 * the existing shadow copy. */
4082 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
4083 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4084 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
4085 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4086 return off;
4087}
4088
4089
4090
4091/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
4092 * start using it with custom native code emission (inlining assembly
4093 * instruction helpers). */
4094#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
4095#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4096 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4097 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
4098
4099#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
4100#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4101 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4102 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
4103
4104/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
4105DECL_INLINE_THROW(uint32_t)
4106iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
4107 bool fUpdateSkipping)
4108{
4109 RT_NOREF(fEflOutput);
4110 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
4111 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4112
4113#ifdef VBOX_STRICT
4114 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
4115 uint32_t offFixup = off;
4116 off = iemNativeEmitJnzToFixed(pReNative, off, off);
4117 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
4118 iemNativeFixupFixedJump(pReNative, offFixup, off);
4119
4120 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
4121 offFixup = off;
4122 off = iemNativeEmitJzToFixed(pReNative, off, off);
4123 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
4124 iemNativeFixupFixedJump(pReNative, offFixup, off);
4125
4126 /** @todo validate that only bits in the fElfOutput mask changed. */
4127#endif
4128
4129#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4130 if (fUpdateSkipping)
4131 {
4132 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4133 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4134 else
4135 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4136 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4137 }
4138#else
4139 RT_NOREF_PV(fUpdateSkipping);
4140#endif
4141
4142 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4143 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4144 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4145 return off;
4146}
4147
4148
4149typedef enum IEMNATIVEMITEFLOP
4150{
4151 kIemNativeEmitEflOp_Invalid = 0,
4152 kIemNativeEmitEflOp_Set,
4153 kIemNativeEmitEflOp_Clear,
4154 kIemNativeEmitEflOp_Flip
4155} IEMNATIVEMITEFLOP;
4156
4157#define IEM_MC_SET_EFL_BIT(a_fBit) \
4158 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
4159
4160#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
4161 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
4162
4163#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
4164 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
4165
4166/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
4167DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
4168{
4169 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4170 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
4171
4172 switch (enmOp)
4173 {
4174 case kIemNativeEmitEflOp_Set:
4175 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4176 break;
4177 case kIemNativeEmitEflOp_Clear:
4178 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
4179 break;
4180 case kIemNativeEmitEflOp_Flip:
4181 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4182 break;
4183 default:
4184 AssertFailed();
4185 break;
4186 }
4187
4188 /** @todo No delayed writeback for EFLAGS right now. */
4189 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4190
4191 /* Free but don't flush the EFLAGS register. */
4192 iemNativeRegFreeTmp(pReNative, idxEflReg);
4193
4194 return off;
4195}
4196
4197
4198/*********************************************************************************************************************************
4199* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4200*********************************************************************************************************************************/
4201
4202#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4203 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4204
4205#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4206 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4207
4208#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4209 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4210
4211
4212/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4213 * IEM_MC_FETCH_SREG_ZX_U64. */
4214DECL_INLINE_THROW(uint32_t)
4215iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4216{
4217 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4218 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4219 Assert(iSReg < X86_SREG_COUNT);
4220
4221 /*
4222 * For now, we will not create a shadow copy of a selector. The rational
4223 * is that since we do not recompile the popping and loading of segment
4224 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4225 * pushing and moving to registers, there is only a small chance that the
4226 * shadow copy will be accessed again before the register is reloaded. One
4227 * scenario would be nested called in 16-bit code, but I doubt it's worth
4228 * the extra register pressure atm.
4229 *
4230 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4231 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4232 * store scencario covered at present (r160730).
4233 */
4234 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4235 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4236 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4237 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4238 return off;
4239}
4240
4241
4242
4243/*********************************************************************************************************************************
4244* Register references. *
4245*********************************************************************************************************************************/
4246
4247#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4248 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4249
4250#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4251 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4252
4253/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4256{
4257 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4258 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4259 Assert(iGRegEx < 20);
4260
4261 if (iGRegEx < 16)
4262 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4263 else
4264 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4265
4266 /* If we've delayed writing back the register value, flush it now. */
4267 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4268
4269 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4270 if (!fConst)
4271 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4272
4273 return off;
4274}
4275
4276#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4277 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4278
4279#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4280 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4281
4282#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4283 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4284
4285#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4286 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4287
4288#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4289 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4290
4291#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4292 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4293
4294#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4295 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4296
4297#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4298 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4299
4300#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4301 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4302
4303#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4304 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4305
4306/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4309{
4310 Assert(iGReg < 16);
4311 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4312 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4313
4314 /* If we've delayed writing back the register value, flush it now. */
4315 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4316
4317 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4318 if (!fConst)
4319 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4320
4321 return off;
4322}
4323
4324
4325#undef IEM_MC_REF_EFLAGS /* should not be used. */
4326#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4327 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4328 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4329
4330/** Handles IEM_MC_REF_EFLAGS. */
4331DECL_INLINE_THROW(uint32_t)
4332iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4333{
4334 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4335 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4336
4337#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4338 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4339
4340 /* Updating the skipping according to the outputs is a little early, but
4341 we don't have any other hooks for references atm. */
4342 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4343 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4344 else if (fEflOutput & X86_EFL_STATUS_BITS)
4345 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4346 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4347#else
4348 RT_NOREF(fEflInput, fEflOutput);
4349#endif
4350
4351 /* If we've delayed writing back the register value, flush it now. */
4352 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4353
4354 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4355 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4356
4357 return off;
4358}
4359
4360
4361/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4362 * different code from threaded recompiler, maybe it would be helpful. For now
4363 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4364#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4365
4366
4367#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4368 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4369
4370#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4371 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4372
4373#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4374 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4375
4376#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4377/* Just being paranoid here. */
4378# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4379AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4380AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4381AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4382AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4383# endif
4384AssertCompileMemberOffset(X86XMMREG, au64, 0);
4385AssertCompileMemberOffset(X86XMMREG, au32, 0);
4386AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4387AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4388
4389# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4390 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4391# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4392 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4393# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4394 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4395# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4396 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4397#endif
4398
4399/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4400DECL_INLINE_THROW(uint32_t)
4401iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4402{
4403 Assert(iXReg < 16);
4404 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4405 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4406
4407 /* If we've delayed writing back the register value, flush it now. */
4408 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4409
4410#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4411 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4412 if (!fConst)
4413 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4414#else
4415 RT_NOREF(fConst);
4416#endif
4417
4418 return off;
4419}
4420
4421
4422
4423/*********************************************************************************************************************************
4424* Effective Address Calculation *
4425*********************************************************************************************************************************/
4426#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4427 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4428
4429/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4430 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4431DECL_INLINE_THROW(uint32_t)
4432iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4433 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4434{
4435 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4436
4437 /*
4438 * Handle the disp16 form with no registers first.
4439 *
4440 * Convert to an immediate value, as that'll delay the register allocation
4441 * and assignment till the memory access / call / whatever and we can use
4442 * a more appropriate register (or none at all).
4443 */
4444 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4445 {
4446 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4447 return off;
4448 }
4449
4450 /* Determin the displacment. */
4451 uint16_t u16EffAddr;
4452 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4453 {
4454 case 0: u16EffAddr = 0; break;
4455 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4456 case 2: u16EffAddr = u16Disp; break;
4457 default: AssertFailedStmt(u16EffAddr = 0);
4458 }
4459
4460 /* Determine the registers involved. */
4461 uint8_t idxGstRegBase;
4462 uint8_t idxGstRegIndex;
4463 switch (bRm & X86_MODRM_RM_MASK)
4464 {
4465 case 0:
4466 idxGstRegBase = X86_GREG_xBX;
4467 idxGstRegIndex = X86_GREG_xSI;
4468 break;
4469 case 1:
4470 idxGstRegBase = X86_GREG_xBX;
4471 idxGstRegIndex = X86_GREG_xDI;
4472 break;
4473 case 2:
4474 idxGstRegBase = X86_GREG_xBP;
4475 idxGstRegIndex = X86_GREG_xSI;
4476 break;
4477 case 3:
4478 idxGstRegBase = X86_GREG_xBP;
4479 idxGstRegIndex = X86_GREG_xDI;
4480 break;
4481 case 4:
4482 idxGstRegBase = X86_GREG_xSI;
4483 idxGstRegIndex = UINT8_MAX;
4484 break;
4485 case 5:
4486 idxGstRegBase = X86_GREG_xDI;
4487 idxGstRegIndex = UINT8_MAX;
4488 break;
4489 case 6:
4490 idxGstRegBase = X86_GREG_xBP;
4491 idxGstRegIndex = UINT8_MAX;
4492 break;
4493#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4494 default:
4495#endif
4496 case 7:
4497 idxGstRegBase = X86_GREG_xBX;
4498 idxGstRegIndex = UINT8_MAX;
4499 break;
4500 }
4501
4502 /*
4503 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4504 */
4505 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4506 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4507 kIemNativeGstRegUse_ReadOnly);
4508 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4509 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4510 kIemNativeGstRegUse_ReadOnly)
4511 : UINT8_MAX;
4512#ifdef RT_ARCH_AMD64
4513 if (idxRegIndex == UINT8_MAX)
4514 {
4515 if (u16EffAddr == 0)
4516 {
4517 /* movxz ret, base */
4518 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4519 }
4520 else
4521 {
4522 /* lea ret32, [base64 + disp32] */
4523 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4524 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4525 if (idxRegRet >= 8 || idxRegBase >= 8)
4526 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4527 pbCodeBuf[off++] = 0x8d;
4528 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4529 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4530 else
4531 {
4532 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4533 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4534 }
4535 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4536 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4537 pbCodeBuf[off++] = 0;
4538 pbCodeBuf[off++] = 0;
4539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4540
4541 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4542 }
4543 }
4544 else
4545 {
4546 /* lea ret32, [index64 + base64 (+ disp32)] */
4547 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4548 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4549 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4550 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4551 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4552 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4553 pbCodeBuf[off++] = 0x8d;
4554 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4555 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4556 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4557 if (bMod == X86_MOD_MEM4)
4558 {
4559 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4560 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4561 pbCodeBuf[off++] = 0;
4562 pbCodeBuf[off++] = 0;
4563 }
4564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4565 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4566 }
4567
4568#elif defined(RT_ARCH_ARM64)
4569 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4570 if (u16EffAddr == 0)
4571 {
4572 if (idxRegIndex == UINT8_MAX)
4573 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4574 else
4575 {
4576 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4577 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4578 }
4579 }
4580 else
4581 {
4582 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4583 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4584 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4585 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4586 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4587 else
4588 {
4589 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4590 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4591 }
4592 if (idxRegIndex != UINT8_MAX)
4593 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4594 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4595 }
4596
4597#else
4598# error "port me"
4599#endif
4600
4601 if (idxRegIndex != UINT8_MAX)
4602 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4603 iemNativeRegFreeTmp(pReNative, idxRegBase);
4604 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4605 return off;
4606}
4607
4608
4609#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4610 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4611
4612/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4613 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4614DECL_INLINE_THROW(uint32_t)
4615iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4616 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4617{
4618 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4619
4620 /*
4621 * Handle the disp32 form with no registers first.
4622 *
4623 * Convert to an immediate value, as that'll delay the register allocation
4624 * and assignment till the memory access / call / whatever and we can use
4625 * a more appropriate register (or none at all).
4626 */
4627 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4628 {
4629 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4630 return off;
4631 }
4632
4633 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4634 uint32_t u32EffAddr = 0;
4635 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4636 {
4637 case 0: break;
4638 case 1: u32EffAddr = (int8_t)u32Disp; break;
4639 case 2: u32EffAddr = u32Disp; break;
4640 default: AssertFailed();
4641 }
4642
4643 /* Get the register (or SIB) value. */
4644 uint8_t idxGstRegBase = UINT8_MAX;
4645 uint8_t idxGstRegIndex = UINT8_MAX;
4646 uint8_t cShiftIndex = 0;
4647 switch (bRm & X86_MODRM_RM_MASK)
4648 {
4649 case 0: idxGstRegBase = X86_GREG_xAX; break;
4650 case 1: idxGstRegBase = X86_GREG_xCX; break;
4651 case 2: idxGstRegBase = X86_GREG_xDX; break;
4652 case 3: idxGstRegBase = X86_GREG_xBX; break;
4653 case 4: /* SIB */
4654 {
4655 /* index /w scaling . */
4656 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4657 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4658 {
4659 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4660 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4661 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4662 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4663 case 4: cShiftIndex = 0; /*no index*/ break;
4664 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4665 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4666 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4667 }
4668
4669 /* base */
4670 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4671 {
4672 case 0: idxGstRegBase = X86_GREG_xAX; break;
4673 case 1: idxGstRegBase = X86_GREG_xCX; break;
4674 case 2: idxGstRegBase = X86_GREG_xDX; break;
4675 case 3: idxGstRegBase = X86_GREG_xBX; break;
4676 case 4:
4677 idxGstRegBase = X86_GREG_xSP;
4678 u32EffAddr += uSibAndRspOffset >> 8;
4679 break;
4680 case 5:
4681 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4682 idxGstRegBase = X86_GREG_xBP;
4683 else
4684 {
4685 Assert(u32EffAddr == 0);
4686 u32EffAddr = u32Disp;
4687 }
4688 break;
4689 case 6: idxGstRegBase = X86_GREG_xSI; break;
4690 case 7: idxGstRegBase = X86_GREG_xDI; break;
4691 }
4692 break;
4693 }
4694 case 5: idxGstRegBase = X86_GREG_xBP; break;
4695 case 6: idxGstRegBase = X86_GREG_xSI; break;
4696 case 7: idxGstRegBase = X86_GREG_xDI; break;
4697 }
4698
4699 /*
4700 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4701 * the start of the function.
4702 */
4703 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4704 {
4705 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4706 return off;
4707 }
4708
4709 /*
4710 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4711 */
4712 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4713 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4714 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4715 kIemNativeGstRegUse_ReadOnly);
4716 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4717 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4718 kIemNativeGstRegUse_ReadOnly);
4719
4720 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4721 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4722 {
4723 idxRegBase = idxRegIndex;
4724 idxRegIndex = UINT8_MAX;
4725 }
4726
4727#ifdef RT_ARCH_AMD64
4728 if (idxRegIndex == UINT8_MAX)
4729 {
4730 if (u32EffAddr == 0)
4731 {
4732 /* mov ret, base */
4733 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4734 }
4735 else
4736 {
4737 /* lea ret32, [base64 + disp32] */
4738 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4739 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4740 if (idxRegRet >= 8 || idxRegBase >= 8)
4741 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4742 pbCodeBuf[off++] = 0x8d;
4743 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4744 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4745 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4746 else
4747 {
4748 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4749 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4750 }
4751 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4752 if (bMod == X86_MOD_MEM4)
4753 {
4754 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4755 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4756 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4757 }
4758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4759 }
4760 }
4761 else
4762 {
4763 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4764 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4765 if (idxRegBase == UINT8_MAX)
4766 {
4767 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4768 if (idxRegRet >= 8 || idxRegIndex >= 8)
4769 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4770 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4771 pbCodeBuf[off++] = 0x8d;
4772 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4773 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4774 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4775 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4776 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4777 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4778 }
4779 else
4780 {
4781 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4782 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4783 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4784 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4785 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4786 pbCodeBuf[off++] = 0x8d;
4787 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4788 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4789 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4790 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4791 if (bMod != X86_MOD_MEM0)
4792 {
4793 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4794 if (bMod == X86_MOD_MEM4)
4795 {
4796 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4797 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4798 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4799 }
4800 }
4801 }
4802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4803 }
4804
4805#elif defined(RT_ARCH_ARM64)
4806 if (u32EffAddr == 0)
4807 {
4808 if (idxRegIndex == UINT8_MAX)
4809 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4810 else if (idxRegBase == UINT8_MAX)
4811 {
4812 if (cShiftIndex == 0)
4813 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4814 else
4815 {
4816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4817 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4818 }
4819 }
4820 else
4821 {
4822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4823 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4824 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4825 }
4826 }
4827 else
4828 {
4829 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4830 {
4831 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4832 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4833 }
4834 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4835 {
4836 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4837 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4838 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4839 }
4840 else
4841 {
4842 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4843 if (idxRegBase != UINT8_MAX)
4844 {
4845 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4846 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4847 }
4848 }
4849 if (idxRegIndex != UINT8_MAX)
4850 {
4851 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4852 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4853 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4854 }
4855 }
4856
4857#else
4858# error "port me"
4859#endif
4860
4861 if (idxRegIndex != UINT8_MAX)
4862 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4863 if (idxRegBase != UINT8_MAX)
4864 iemNativeRegFreeTmp(pReNative, idxRegBase);
4865 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4866 return off;
4867}
4868
4869
4870#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4871 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4872 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4873
4874#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4875 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4876 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4877
4878#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4879 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4880 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4881
4882/**
4883 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4884 *
4885 * @returns New off.
4886 * @param pReNative .
4887 * @param off .
4888 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4889 * bit 4 to REX.X. The two bits are part of the
4890 * REG sub-field, which isn't needed in this
4891 * function.
4892 * @param uSibAndRspOffset Two parts:
4893 * - The first 8 bits make up the SIB byte.
4894 * - The next 8 bits are the fixed RSP/ESP offset
4895 * in case of a pop [xSP].
4896 * @param u32Disp The displacement byte/word/dword, if any.
4897 * @param cbInstr The size of the fully decoded instruction. Used
4898 * for RIP relative addressing.
4899 * @param idxVarRet The result variable number.
4900 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4901 * when calculating the address.
4902 *
4903 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4904 */
4905DECL_INLINE_THROW(uint32_t)
4906iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4907 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4908{
4909 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4910
4911 /*
4912 * Special case the rip + disp32 form first.
4913 */
4914 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4915 {
4916#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4917 /* Need to take the current PC offset into account for the displacement, no need to flush here
4918 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4919 u32Disp += pReNative->Core.offPc;
4920#endif
4921
4922 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4923 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4924 kIemNativeGstRegUse_ReadOnly);
4925#ifdef RT_ARCH_AMD64
4926 if (f64Bit)
4927 {
4928 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4929 if ((int32_t)offFinalDisp == offFinalDisp)
4930 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4931 else
4932 {
4933 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4934 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4935 }
4936 }
4937 else
4938 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4939
4940#elif defined(RT_ARCH_ARM64)
4941 if (f64Bit)
4942 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4943 (int64_t)(int32_t)u32Disp + cbInstr);
4944 else
4945 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4946 (int32_t)u32Disp + cbInstr);
4947
4948#else
4949# error "Port me!"
4950#endif
4951 iemNativeRegFreeTmp(pReNative, idxRegPc);
4952 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4953 return off;
4954 }
4955
4956 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4957 int64_t i64EffAddr = 0;
4958 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4959 {
4960 case 0: break;
4961 case 1: i64EffAddr = (int8_t)u32Disp; break;
4962 case 2: i64EffAddr = (int32_t)u32Disp; break;
4963 default: AssertFailed();
4964 }
4965
4966 /* Get the register (or SIB) value. */
4967 uint8_t idxGstRegBase = UINT8_MAX;
4968 uint8_t idxGstRegIndex = UINT8_MAX;
4969 uint8_t cShiftIndex = 0;
4970 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4971 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4972 else /* SIB: */
4973 {
4974 /* index /w scaling . */
4975 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4976 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4977 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4978 if (idxGstRegIndex == 4)
4979 {
4980 /* no index */
4981 cShiftIndex = 0;
4982 idxGstRegIndex = UINT8_MAX;
4983 }
4984
4985 /* base */
4986 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4987 if (idxGstRegBase == 4)
4988 {
4989 /* pop [rsp] hack */
4990 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4991 }
4992 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4993 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4994 {
4995 /* mod=0 and base=5 -> disp32, no base reg. */
4996 Assert(i64EffAddr == 0);
4997 i64EffAddr = (int32_t)u32Disp;
4998 idxGstRegBase = UINT8_MAX;
4999 }
5000 }
5001
5002 /*
5003 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5004 * the start of the function.
5005 */
5006 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5007 {
5008 if (f64Bit)
5009 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
5010 else
5011 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
5012 return off;
5013 }
5014
5015 /*
5016 * Now emit code that calculates:
5017 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5018 * or if !f64Bit:
5019 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5020 */
5021 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5022 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5023 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5024 kIemNativeGstRegUse_ReadOnly);
5025 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5026 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5027 kIemNativeGstRegUse_ReadOnly);
5028
5029 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5030 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5031 {
5032 idxRegBase = idxRegIndex;
5033 idxRegIndex = UINT8_MAX;
5034 }
5035
5036#ifdef RT_ARCH_AMD64
5037 uint8_t bFinalAdj;
5038 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
5039 bFinalAdj = 0; /* likely */
5040 else
5041 {
5042 /* pop [rsp] with a problematic disp32 value. Split out the
5043 RSP offset and add it separately afterwards (bFinalAdj). */
5044 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
5045 Assert(idxGstRegBase == X86_GREG_xSP);
5046 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
5047 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
5048 Assert(bFinalAdj != 0);
5049 i64EffAddr -= bFinalAdj;
5050 Assert((int32_t)i64EffAddr == i64EffAddr);
5051 }
5052 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
5053//pReNative->pInstrBuf[off++] = 0xcc;
5054
5055 if (idxRegIndex == UINT8_MAX)
5056 {
5057 if (u32EffAddr == 0)
5058 {
5059 /* mov ret, base */
5060 if (f64Bit)
5061 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
5062 else
5063 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5064 }
5065 else
5066 {
5067 /* lea ret, [base + disp32] */
5068 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5069 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5070 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
5071 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5072 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5073 | (f64Bit ? X86_OP_REX_W : 0);
5074 pbCodeBuf[off++] = 0x8d;
5075 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5076 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5077 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5078 else
5079 {
5080 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5081 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5082 }
5083 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5084 if (bMod == X86_MOD_MEM4)
5085 {
5086 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5087 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5088 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5089 }
5090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5091 }
5092 }
5093 else
5094 {
5095 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5096 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5097 if (idxRegBase == UINT8_MAX)
5098 {
5099 /* lea ret, [(index64 << cShiftIndex) + disp32] */
5100 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
5101 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5102 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5103 | (f64Bit ? X86_OP_REX_W : 0);
5104 pbCodeBuf[off++] = 0x8d;
5105 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5106 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5107 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5108 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5109 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5110 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5111 }
5112 else
5113 {
5114 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5115 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5116 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5117 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5118 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5119 | (f64Bit ? X86_OP_REX_W : 0);
5120 pbCodeBuf[off++] = 0x8d;
5121 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5122 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5123 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5124 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5125 if (bMod != X86_MOD_MEM0)
5126 {
5127 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5128 if (bMod == X86_MOD_MEM4)
5129 {
5130 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5131 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5132 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5133 }
5134 }
5135 }
5136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5137 }
5138
5139 if (!bFinalAdj)
5140 { /* likely */ }
5141 else
5142 {
5143 Assert(f64Bit);
5144 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
5145 }
5146
5147#elif defined(RT_ARCH_ARM64)
5148 if (i64EffAddr == 0)
5149 {
5150 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5151 if (idxRegIndex == UINT8_MAX)
5152 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
5153 else if (idxRegBase != UINT8_MAX)
5154 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5155 f64Bit, false /*fSetFlags*/, cShiftIndex);
5156 else
5157 {
5158 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
5159 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
5160 }
5161 }
5162 else
5163 {
5164 if (f64Bit)
5165 { /* likely */ }
5166 else
5167 i64EffAddr = (int32_t)i64EffAddr;
5168
5169 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
5170 {
5171 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5173 }
5174 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5175 {
5176 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5177 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5178 }
5179 else
5180 {
5181 if (f64Bit)
5182 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5183 else
5184 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5185 if (idxRegBase != UINT8_MAX)
5186 {
5187 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5188 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5189 }
5190 }
5191 if (idxRegIndex != UINT8_MAX)
5192 {
5193 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5194 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5195 f64Bit, false /*fSetFlags*/, cShiftIndex);
5196 }
5197 }
5198
5199#else
5200# error "port me"
5201#endif
5202
5203 if (idxRegIndex != UINT8_MAX)
5204 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5205 if (idxRegBase != UINT8_MAX)
5206 iemNativeRegFreeTmp(pReNative, idxRegBase);
5207 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5208 return off;
5209}
5210
5211
5212/*********************************************************************************************************************************
5213* Memory fetches and stores common *
5214*********************************************************************************************************************************/
5215
5216typedef enum IEMNATIVEMITMEMOP
5217{
5218 kIemNativeEmitMemOp_Store = 0,
5219 kIemNativeEmitMemOp_Fetch,
5220 kIemNativeEmitMemOp_Fetch_Zx_U16,
5221 kIemNativeEmitMemOp_Fetch_Zx_U32,
5222 kIemNativeEmitMemOp_Fetch_Zx_U64,
5223 kIemNativeEmitMemOp_Fetch_Sx_U16,
5224 kIemNativeEmitMemOp_Fetch_Sx_U32,
5225 kIemNativeEmitMemOp_Fetch_Sx_U64
5226} IEMNATIVEMITMEMOP;
5227
5228/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5229 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5230 * (with iSegReg = UINT8_MAX). */
5231DECL_INLINE_THROW(uint32_t)
5232iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5233 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5234 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5235{
5236 /*
5237 * Assert sanity.
5238 */
5239 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5240 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5241 Assert( enmOp != kIemNativeEmitMemOp_Store
5242 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5243 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5244 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5245 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5246 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5247 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5248 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5249 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5250#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5251 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5252 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5253#else
5254 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5255#endif
5256 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5257#ifdef VBOX_STRICT
5258 if (iSegReg == UINT8_MAX)
5259 {
5260 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5261 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5262 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5263 switch (cbMem)
5264 {
5265 case 1:
5266 Assert( pfnFunction
5267 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5268 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5269 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5270 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5271 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5272 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5273 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5274 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5275 : UINT64_C(0xc000b000a0009000) ));
5276 break;
5277 case 2:
5278 Assert( pfnFunction
5279 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5280 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5281 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5282 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5283 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5284 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5285 : UINT64_C(0xc000b000a0009000) ));
5286 break;
5287 case 4:
5288 Assert( pfnFunction
5289 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5290 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5291 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5292 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5293 : UINT64_C(0xc000b000a0009000) ));
5294 break;
5295 case 8:
5296 Assert( pfnFunction
5297 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5298 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5299 : UINT64_C(0xc000b000a0009000) ));
5300 break;
5301#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5302 case sizeof(RTUINT128U):
5303 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5304 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5305 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5306 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5307 || ( enmOp == kIemNativeEmitMemOp_Store
5308 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5309 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5310 break;
5311 case sizeof(RTUINT256U):
5312 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5313 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5314 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5315 || ( enmOp == kIemNativeEmitMemOp_Store
5316 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5317 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5318 break;
5319#endif
5320 }
5321 }
5322 else
5323 {
5324 Assert(iSegReg < 6);
5325 switch (cbMem)
5326 {
5327 case 1:
5328 Assert( pfnFunction
5329 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5330 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5331 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5332 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5333 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5334 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5335 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5336 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5337 : UINT64_C(0xc000b000a0009000) ));
5338 break;
5339 case 2:
5340 Assert( pfnFunction
5341 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5342 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5343 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5344 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5345 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5346 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5347 : UINT64_C(0xc000b000a0009000) ));
5348 break;
5349 case 4:
5350 Assert( pfnFunction
5351 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5352 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5353 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5354 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5355 : UINT64_C(0xc000b000a0009000) ));
5356 break;
5357 case 8:
5358 Assert( pfnFunction
5359 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5360 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5361 : UINT64_C(0xc000b000a0009000) ));
5362 break;
5363#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5364 case sizeof(RTUINT128U):
5365 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5366 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5367 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5368 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5369 || ( enmOp == kIemNativeEmitMemOp_Store
5370 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5371 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5372 break;
5373 case sizeof(RTUINT256U):
5374 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5375 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5376 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5377 || ( enmOp == kIemNativeEmitMemOp_Store
5378 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5379 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5380 break;
5381#endif
5382 }
5383 }
5384#endif
5385
5386#ifdef VBOX_STRICT
5387 /*
5388 * Check that the fExec flags we've got make sense.
5389 */
5390 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5391#endif
5392
5393 /*
5394 * To keep things simple we have to commit any pending writes first as we
5395 * may end up making calls.
5396 */
5397 /** @todo we could postpone this till we make the call and reload the
5398 * registers after returning from the call. Not sure if that's sensible or
5399 * not, though. */
5400#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5401 off = iemNativeRegFlushPendingWrites(pReNative, off);
5402#else
5403 /* The program counter is treated differently for now. */
5404 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5405#endif
5406
5407#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5408 /*
5409 * Move/spill/flush stuff out of call-volatile registers.
5410 * This is the easy way out. We could contain this to the tlb-miss branch
5411 * by saving and restoring active stuff here.
5412 */
5413 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5414#endif
5415
5416 /*
5417 * Define labels and allocate the result register (trying for the return
5418 * register if we can).
5419 */
5420 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5421#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5422 uint8_t idxRegValueFetch = UINT8_MAX;
5423
5424 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5425 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5426 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5427 else
5428 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5429 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5430 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5431 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5432#else
5433 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5434 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5435 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5436 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5437#endif
5438 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5439
5440#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5441 uint8_t idxRegValueStore = UINT8_MAX;
5442
5443 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5444 idxRegValueStore = !TlbState.fSkip
5445 && enmOp == kIemNativeEmitMemOp_Store
5446 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5447 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5448 : UINT8_MAX;
5449 else
5450 idxRegValueStore = !TlbState.fSkip
5451 && enmOp == kIemNativeEmitMemOp_Store
5452 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5453 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5454 : UINT8_MAX;
5455
5456#else
5457 uint8_t const idxRegValueStore = !TlbState.fSkip
5458 && enmOp == kIemNativeEmitMemOp_Store
5459 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5460 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5461 : UINT8_MAX;
5462#endif
5463 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5464 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5465 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5466 : UINT32_MAX;
5467
5468 /*
5469 * Jump to the TLB lookup code.
5470 */
5471 if (!TlbState.fSkip)
5472 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5473
5474 /*
5475 * TlbMiss:
5476 *
5477 * Call helper to do the fetching.
5478 * We flush all guest register shadow copies here.
5479 */
5480 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5481
5482#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5483 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5484#else
5485 RT_NOREF(idxInstr);
5486#endif
5487
5488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5489 if (pReNative->Core.offPc)
5490 {
5491 /*
5492 * Update the program counter but restore it at the end of the TlbMiss branch.
5493 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5494 * which are hopefully much more frequent, reducing the amount of memory accesses.
5495 */
5496 /* Allocate a temporary PC register. */
5497 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5498
5499 /* Perform the addition and store the result. */
5500 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5501 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5502
5503 /* Free and flush the PC register. */
5504 iemNativeRegFreeTmp(pReNative, idxPcReg);
5505 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5506 }
5507#endif
5508
5509#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5510 /* Save variables in volatile registers. */
5511 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5512 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5513 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5514 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5515#endif
5516
5517 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5518 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5519#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5520 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5521 {
5522 /*
5523 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5524 *
5525 * @note There was a register variable assigned to the variable for the TlbLookup case above
5526 * which must not be freed or the value loaded into the register will not be synced into the register
5527 * further down the road because the variable doesn't know it had a variable assigned.
5528 *
5529 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5530 * as it will be overwritten anyway.
5531 */
5532 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5533 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5534 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5535 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5536 }
5537 else
5538#endif
5539 if (enmOp == kIemNativeEmitMemOp_Store)
5540 {
5541 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5542 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5543#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5544 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5545#else
5546 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5547 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5548#endif
5549 }
5550
5551 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5552 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5553#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5554 fVolGregMask);
5555#else
5556 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5557#endif
5558
5559 if (iSegReg != UINT8_MAX)
5560 {
5561 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5562 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5563 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5564 }
5565
5566 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5568
5569 /* Done setting up parameters, make the call. */
5570 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5571
5572 /*
5573 * Put the result in the right register if this is a fetch.
5574 */
5575 if (enmOp != kIemNativeEmitMemOp_Store)
5576 {
5577#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5578 if ( cbMem == sizeof(RTUINT128U)
5579 || cbMem == sizeof(RTUINT256U))
5580 {
5581 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5582
5583 /* Sync the value on the stack with the host register assigned to the variable. */
5584 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5585 }
5586 else
5587#endif
5588 {
5589 Assert(idxRegValueFetch == pVarValue->idxReg);
5590 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5591 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5592 }
5593 }
5594
5595#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5596 /* Restore variables and guest shadow registers to volatile registers. */
5597 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5598 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5599#endif
5600
5601#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5602 if (pReNative->Core.offPc)
5603 {
5604 /*
5605 * Time to restore the program counter to its original value.
5606 */
5607 /* Allocate a temporary PC register. */
5608 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5609
5610 /* Restore the original value. */
5611 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5612 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5613
5614 /* Free and flush the PC register. */
5615 iemNativeRegFreeTmp(pReNative, idxPcReg);
5616 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5617 }
5618#endif
5619
5620#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5621 if (!TlbState.fSkip)
5622 {
5623 /* end of TlbMiss - Jump to the done label. */
5624 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5625 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5626
5627 /*
5628 * TlbLookup:
5629 */
5630 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5631 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5632 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5633
5634 /*
5635 * Emit code to do the actual storing / fetching.
5636 */
5637 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5638# ifdef VBOX_WITH_STATISTICS
5639 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5640 enmOp == kIemNativeEmitMemOp_Store
5641 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5642 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5643# endif
5644 switch (enmOp)
5645 {
5646 case kIemNativeEmitMemOp_Store:
5647 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5648 {
5649 switch (cbMem)
5650 {
5651 case 1:
5652 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5653 break;
5654 case 2:
5655 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5656 break;
5657 case 4:
5658 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5659 break;
5660 case 8:
5661 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5662 break;
5663#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5664 case sizeof(RTUINT128U):
5665 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5666 break;
5667 case sizeof(RTUINT256U):
5668 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5669 break;
5670#endif
5671 default:
5672 AssertFailed();
5673 }
5674 }
5675 else
5676 {
5677 switch (cbMem)
5678 {
5679 case 1:
5680 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5681 idxRegMemResult, TlbState.idxReg1);
5682 break;
5683 case 2:
5684 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5685 idxRegMemResult, TlbState.idxReg1);
5686 break;
5687 case 4:
5688 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5689 idxRegMemResult, TlbState.idxReg1);
5690 break;
5691 case 8:
5692 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5693 idxRegMemResult, TlbState.idxReg1);
5694 break;
5695 default:
5696 AssertFailed();
5697 }
5698 }
5699 break;
5700
5701 case kIemNativeEmitMemOp_Fetch:
5702 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5703 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5704 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5705 switch (cbMem)
5706 {
5707 case 1:
5708 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5709 break;
5710 case 2:
5711 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5712 break;
5713 case 4:
5714 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5715 break;
5716 case 8:
5717 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5718 break;
5719#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5720 case sizeof(RTUINT128U):
5721 /*
5722 * No need to sync back the register with the stack, this is done by the generic variable handling
5723 * code if there is a register assigned to a variable and the stack must be accessed.
5724 */
5725 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5726 break;
5727 case sizeof(RTUINT256U):
5728 /*
5729 * No need to sync back the register with the stack, this is done by the generic variable handling
5730 * code if there is a register assigned to a variable and the stack must be accessed.
5731 */
5732 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5733 break;
5734#endif
5735 default:
5736 AssertFailed();
5737 }
5738 break;
5739
5740 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5741 Assert(cbMem == 1);
5742 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5743 break;
5744
5745 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5746 Assert(cbMem == 1 || cbMem == 2);
5747 if (cbMem == 1)
5748 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5749 else
5750 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5751 break;
5752
5753 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5754 switch (cbMem)
5755 {
5756 case 1:
5757 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5758 break;
5759 case 2:
5760 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5761 break;
5762 case 4:
5763 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5764 break;
5765 default:
5766 AssertFailed();
5767 }
5768 break;
5769
5770 default:
5771 AssertFailed();
5772 }
5773
5774 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5775
5776 /*
5777 * TlbDone:
5778 */
5779 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5780
5781 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5782
5783# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5784 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5785 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5786# endif
5787 }
5788#else
5789 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5790#endif
5791
5792 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5793 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5794 return off;
5795}
5796
5797
5798
5799/*********************************************************************************************************************************
5800* Memory fetches (IEM_MEM_FETCH_XXX). *
5801*********************************************************************************************************************************/
5802
5803/* 8-bit segmented: */
5804#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5805 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5806 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5807 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5808
5809#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5810 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5811 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5812 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5813
5814#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5815 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5816 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5817 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5818
5819#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5820 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5821 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5822 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5823
5824#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5825 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5826 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5827 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5828
5829#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5830 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5831 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5832 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5833
5834#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5835 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5836 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5837 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5838
5839/* 16-bit segmented: */
5840#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5841 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5842 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5843 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5844
5845#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5846 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5847 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5848 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5849
5850#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5851 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5852 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5853 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5854
5855#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5857 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5858 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5859
5860#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5862 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5863 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5864
5865#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5867 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5868 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5869
5870
5871/* 32-bit segmented: */
5872#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5873 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5874 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5875 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5876
5877#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5878 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5879 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5880 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5881
5882#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5883 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5884 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5885 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5886
5887#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5889 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5890 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5891
5892#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
5893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
5894 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5895 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5896
5897#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
5898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
5899 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5900 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5901
5902#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
5903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
5904 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5905 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5906
5907AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5908#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5910 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5911 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5912
5913
5914/* 64-bit segmented: */
5915#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5917 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5918 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5919
5920AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5921#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5923 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5924 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5925
5926
5927/* 8-bit flat: */
5928#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5930 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5931 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5932
5933#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5935 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5936 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5937
5938#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5939 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5940 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5941 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5942
5943#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5944 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5945 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5946 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5947
5948#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5949 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5950 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5951 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5952
5953#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5955 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5956 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5957
5958#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5960 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5961 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5962
5963
5964/* 16-bit flat: */
5965#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5966 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5967 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5968 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5969
5970#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5971 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5972 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5973 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5974
5975#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5976 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5977 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5978 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5979
5980#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5981 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5982 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5983 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5984
5985#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5987 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5988 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5989
5990#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5991 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5992 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5993 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5994
5995/* 32-bit flat: */
5996#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5998 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5999 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6000
6001#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
6002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6003 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6004 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6005
6006#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
6007 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6008 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6009 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6010
6011#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
6012 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6013 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6014 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6015
6016#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
6017 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
6018 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6019 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6020
6021#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
6022 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
6023 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6024 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6025
6026#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
6027 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
6028 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6029 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6030
6031#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
6032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
6033 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6034 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6035
6036
6037/* 64-bit flat: */
6038#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
6039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6040 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6041 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6042
6043#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
6044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
6045 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6046 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6047
6048#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6049/* 128-bit segmented: */
6050#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
6051 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6052 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6053 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
6054
6055#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
6056 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6057 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6058 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6059
6060AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
6061#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
6062 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
6063 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6064 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6065
6066#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
6067 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6068 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6069 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
6070
6071/* 128-bit flat: */
6072#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
6073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6074 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6075 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
6076
6077#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
6078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6079 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6080 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6081
6082#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
6083 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
6084 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6085 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6086
6087#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
6088 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6089 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6090 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
6091
6092/* 256-bit segmented: */
6093#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
6094 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6095 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6096 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6097
6098#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
6099 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6100 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6101 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6102
6103#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
6104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6105 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6106 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6107
6108
6109/* 256-bit flat: */
6110#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
6111 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6112 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6113 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6114
6115#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
6116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6117 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6118 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6119
6120#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
6121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6122 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6123 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6124#endif
6125
6126
6127/*********************************************************************************************************************************
6128* Memory stores (IEM_MEM_STORE_XXX). *
6129*********************************************************************************************************************************/
6130
6131#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
6132 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
6133 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6134 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6135
6136#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
6137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
6138 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6139 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6140
6141#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
6142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
6143 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6144 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6145
6146#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
6147 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
6148 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6149 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6150
6151
6152#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
6153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
6154 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6155 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6156
6157#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
6158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
6159 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6160 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6161
6162#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
6163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
6164 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6165 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6166
6167#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
6168 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
6169 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6170 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6171
6172
6173#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
6174 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6175 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6176
6177#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
6178 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6179 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6180
6181#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
6182 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6183 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6184
6185#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
6186 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6187 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6188
6189
6190#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
6191 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6192 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6193
6194#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
6195 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6196 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6197
6198#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
6199 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6200 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6201
6202#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6203 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6204 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6205
6206/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6207 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6208DECL_INLINE_THROW(uint32_t)
6209iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6210 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6211{
6212 /*
6213 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6214 * to do the grunt work.
6215 */
6216 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6217 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6218 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6219 pfnFunction, idxInstr);
6220 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6221 return off;
6222}
6223
6224
6225#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6226# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6228 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6229 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6230
6231# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6233 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6234 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6235
6236# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6238 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6239 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6240
6241# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6242 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6243 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6244 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6245
6246
6247# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6248 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6249 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6250 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6251
6252# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6253 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6254 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6255 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6256
6257# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6258 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6259 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6260 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6261
6262# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6263 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6264 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6265 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6266#endif
6267
6268
6269
6270/*********************************************************************************************************************************
6271* Stack Accesses. *
6272*********************************************************************************************************************************/
6273/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6274#define IEM_MC_PUSH_U16(a_u16Value) \
6275 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6276 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6277#define IEM_MC_PUSH_U32(a_u32Value) \
6278 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6279 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6280#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6281 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6282 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6283#define IEM_MC_PUSH_U64(a_u64Value) \
6284 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6285 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6286
6287#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6288 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6289 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6290#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6291 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6292 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6293#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6294 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6295 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6296
6297#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6298 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6299 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6300#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6301 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6302 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6303
6304
6305DECL_FORCE_INLINE_THROW(uint32_t)
6306iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6307{
6308 /* Use16BitSp: */
6309#ifdef RT_ARCH_AMD64
6310 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6311 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6312#else
6313 /* sub regeff, regrsp, #cbMem */
6314 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6315 /* and regeff, regeff, #0xffff */
6316 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6317 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6318 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6319 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6320#endif
6321 return off;
6322}
6323
6324
6325DECL_FORCE_INLINE(uint32_t)
6326iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6327{
6328 /* Use32BitSp: */
6329 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6330 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6331 return off;
6332}
6333
6334
6335/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6336DECL_INLINE_THROW(uint32_t)
6337iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6338 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6339{
6340 /*
6341 * Assert sanity.
6342 */
6343 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6344 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6345#ifdef VBOX_STRICT
6346 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6347 {
6348 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6349 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6350 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6351 Assert( pfnFunction
6352 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6353 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6354 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6355 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6356 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6357 : UINT64_C(0xc000b000a0009000) ));
6358 }
6359 else
6360 Assert( pfnFunction
6361 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6362 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6363 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6364 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6365 : UINT64_C(0xc000b000a0009000) ));
6366#endif
6367
6368#ifdef VBOX_STRICT
6369 /*
6370 * Check that the fExec flags we've got make sense.
6371 */
6372 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6373#endif
6374
6375 /*
6376 * To keep things simple we have to commit any pending writes first as we
6377 * may end up making calls.
6378 */
6379 /** @todo we could postpone this till we make the call and reload the
6380 * registers after returning from the call. Not sure if that's sensible or
6381 * not, though. */
6382 off = iemNativeRegFlushPendingWrites(pReNative, off);
6383
6384 /*
6385 * First we calculate the new RSP and the effective stack pointer value.
6386 * For 64-bit mode and flat 32-bit these two are the same.
6387 * (Code structure is very similar to that of PUSH)
6388 */
6389 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6390 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6391 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6392 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6393 ? cbMem : sizeof(uint16_t);
6394 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6395 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6396 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6397 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6398 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6399 if (cBitsFlat != 0)
6400 {
6401 Assert(idxRegEffSp == idxRegRsp);
6402 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6403 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6404 if (cBitsFlat == 64)
6405 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6406 else
6407 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6408 }
6409 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6410 {
6411 Assert(idxRegEffSp != idxRegRsp);
6412 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6413 kIemNativeGstRegUse_ReadOnly);
6414#ifdef RT_ARCH_AMD64
6415 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6416#else
6417 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6418#endif
6419 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6420 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6421 offFixupJumpToUseOtherBitSp = off;
6422 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6423 {
6424 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6425 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6426 }
6427 else
6428 {
6429 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6430 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6431 }
6432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6433 }
6434 /* SpUpdateEnd: */
6435 uint32_t const offLabelSpUpdateEnd = off;
6436
6437 /*
6438 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6439 * we're skipping lookup).
6440 */
6441 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6442 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6443 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6444 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6445 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6446 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6447 : UINT32_MAX;
6448 uint8_t const idxRegValue = !TlbState.fSkip
6449 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6450 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6451 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6452 : UINT8_MAX;
6453 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6454
6455
6456 if (!TlbState.fSkip)
6457 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6458 else
6459 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6460
6461 /*
6462 * Use16BitSp:
6463 */
6464 if (cBitsFlat == 0)
6465 {
6466#ifdef RT_ARCH_AMD64
6467 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6468#else
6469 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6470#endif
6471 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6472 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6473 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6474 else
6475 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6476 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6478 }
6479
6480 /*
6481 * TlbMiss:
6482 *
6483 * Call helper to do the pushing.
6484 */
6485 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6486
6487#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6488 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6489#else
6490 RT_NOREF(idxInstr);
6491#endif
6492
6493 /* Save variables in volatile registers. */
6494 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6495 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6496 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6497 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6498 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6499
6500 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6501 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6502 {
6503 /* Swap them using ARG0 as temp register: */
6504 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6505 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6506 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6507 }
6508 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6509 {
6510 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6511 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6512 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6513
6514 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6515 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6516 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6517 }
6518 else
6519 {
6520 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6521 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6522
6523 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6524 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6525 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6526 }
6527
6528 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6529 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6530
6531 /* Done setting up parameters, make the call. */
6532 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6533
6534 /* Restore variables and guest shadow registers to volatile registers. */
6535 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6536 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6537
6538#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6539 if (!TlbState.fSkip)
6540 {
6541 /* end of TlbMiss - Jump to the done label. */
6542 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6543 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6544
6545 /*
6546 * TlbLookup:
6547 */
6548 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6549 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6550
6551 /*
6552 * Emit code to do the actual storing / fetching.
6553 */
6554 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6555# ifdef VBOX_WITH_STATISTICS
6556 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6557 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6558# endif
6559 if (idxRegValue != UINT8_MAX)
6560 {
6561 switch (cbMemAccess)
6562 {
6563 case 2:
6564 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6565 break;
6566 case 4:
6567 if (!fIsIntelSeg)
6568 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6569 else
6570 {
6571 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6572 PUSH FS in real mode, so we have to try emulate that here.
6573 We borrow the now unused idxReg1 from the TLB lookup code here. */
6574 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6575 kIemNativeGstReg_EFlags);
6576 if (idxRegEfl != UINT8_MAX)
6577 {
6578#ifdef ARCH_AMD64
6579 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6580 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6581 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6582#else
6583 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6584 off, TlbState.idxReg1, idxRegEfl,
6585 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6586#endif
6587 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6588 }
6589 else
6590 {
6591 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6592 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6593 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6594 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6595 }
6596 /* ASSUMES the upper half of idxRegValue is ZERO. */
6597 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6598 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6599 }
6600 break;
6601 case 8:
6602 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6603 break;
6604 default:
6605 AssertFailed();
6606 }
6607 }
6608 else
6609 {
6610 switch (cbMemAccess)
6611 {
6612 case 2:
6613 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6614 idxRegMemResult, TlbState.idxReg1);
6615 break;
6616 case 4:
6617 Assert(!fIsSegReg);
6618 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6619 idxRegMemResult, TlbState.idxReg1);
6620 break;
6621 case 8:
6622 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6623 break;
6624 default:
6625 AssertFailed();
6626 }
6627 }
6628
6629 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6630 TlbState.freeRegsAndReleaseVars(pReNative);
6631
6632 /*
6633 * TlbDone:
6634 *
6635 * Commit the new RSP value.
6636 */
6637 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6638 }
6639#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6640
6641#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6642 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6643#endif
6644 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6645 if (idxRegEffSp != idxRegRsp)
6646 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6647
6648 /* The value variable is implictly flushed. */
6649 if (idxRegValue != UINT8_MAX)
6650 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6651 iemNativeVarFreeLocal(pReNative, idxVarValue);
6652
6653 return off;
6654}
6655
6656
6657
6658/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6659#define IEM_MC_POP_GREG_U16(a_iGReg) \
6660 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6661 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6662#define IEM_MC_POP_GREG_U32(a_iGReg) \
6663 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6664 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6665#define IEM_MC_POP_GREG_U64(a_iGReg) \
6666 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6667 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6668
6669#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6670 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6671 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6672#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6673 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6674 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6675
6676#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6677 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6678 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6679#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6680 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6681 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6682
6683
6684DECL_FORCE_INLINE_THROW(uint32_t)
6685iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6686 uint8_t idxRegTmp)
6687{
6688 /* Use16BitSp: */
6689#ifdef RT_ARCH_AMD64
6690 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6691 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6692 RT_NOREF(idxRegTmp);
6693#else
6694 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6695 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6696 /* add tmp, regrsp, #cbMem */
6697 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6698 /* and tmp, tmp, #0xffff */
6699 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6700 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6701 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6702 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6703#endif
6704 return off;
6705}
6706
6707
6708DECL_FORCE_INLINE(uint32_t)
6709iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6710{
6711 /* Use32BitSp: */
6712 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6713 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6714 return off;
6715}
6716
6717
6718/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6719DECL_INLINE_THROW(uint32_t)
6720iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6721 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6722{
6723 /*
6724 * Assert sanity.
6725 */
6726 Assert(idxGReg < 16);
6727#ifdef VBOX_STRICT
6728 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6729 {
6730 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6731 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6732 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6733 Assert( pfnFunction
6734 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6735 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6736 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6737 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6738 : UINT64_C(0xc000b000a0009000) ));
6739 }
6740 else
6741 Assert( pfnFunction
6742 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6743 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6744 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6745 : UINT64_C(0xc000b000a0009000) ));
6746#endif
6747
6748#ifdef VBOX_STRICT
6749 /*
6750 * Check that the fExec flags we've got make sense.
6751 */
6752 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6753#endif
6754
6755 /*
6756 * To keep things simple we have to commit any pending writes first as we
6757 * may end up making calls.
6758 */
6759 off = iemNativeRegFlushPendingWrites(pReNative, off);
6760
6761 /*
6762 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6763 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6764 * directly as the effective stack pointer.
6765 * (Code structure is very similar to that of PUSH)
6766 */
6767 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6768 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6769 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6770 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6771 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6772 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6773 * will be the resulting register value. */
6774 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6775
6776 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6777 if (cBitsFlat != 0)
6778 {
6779 Assert(idxRegEffSp == idxRegRsp);
6780 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6781 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6782 }
6783 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6784 {
6785 Assert(idxRegEffSp != idxRegRsp);
6786 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6787 kIemNativeGstRegUse_ReadOnly);
6788#ifdef RT_ARCH_AMD64
6789 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6790#else
6791 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6792#endif
6793 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6794 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6795 offFixupJumpToUseOtherBitSp = off;
6796 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6797 {
6798/** @todo can skip idxRegRsp updating when popping ESP. */
6799 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6800 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6801 }
6802 else
6803 {
6804 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6805 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6806 }
6807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6808 }
6809 /* SpUpdateEnd: */
6810 uint32_t const offLabelSpUpdateEnd = off;
6811
6812 /*
6813 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6814 * we're skipping lookup).
6815 */
6816 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6817 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6818 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6819 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6820 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6821 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6822 : UINT32_MAX;
6823
6824 if (!TlbState.fSkip)
6825 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6826 else
6827 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6828
6829 /*
6830 * Use16BitSp:
6831 */
6832 if (cBitsFlat == 0)
6833 {
6834#ifdef RT_ARCH_AMD64
6835 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6836#else
6837 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6838#endif
6839 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6840 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6841 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6842 else
6843 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6844 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6845 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6846 }
6847
6848 /*
6849 * TlbMiss:
6850 *
6851 * Call helper to do the pushing.
6852 */
6853 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6854
6855#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6856 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6857#else
6858 RT_NOREF(idxInstr);
6859#endif
6860
6861 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6862 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6863 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6864 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6865
6866
6867 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6868 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6869 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6870
6871 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6872 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6873
6874 /* Done setting up parameters, make the call. */
6875 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6876
6877 /* Move the return register content to idxRegMemResult. */
6878 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6880
6881 /* Restore variables and guest shadow registers to volatile registers. */
6882 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6883 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6884
6885#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6886 if (!TlbState.fSkip)
6887 {
6888 /* end of TlbMiss - Jump to the done label. */
6889 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6890 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6891
6892 /*
6893 * TlbLookup:
6894 */
6895 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6896 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6897
6898 /*
6899 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6900 */
6901 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6902# ifdef VBOX_WITH_STATISTICS
6903 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6904 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6905# endif
6906 switch (cbMem)
6907 {
6908 case 2:
6909 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6910 break;
6911 case 4:
6912 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6913 break;
6914 case 8:
6915 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6916 break;
6917 default:
6918 AssertFailed();
6919 }
6920
6921 TlbState.freeRegsAndReleaseVars(pReNative);
6922
6923 /*
6924 * TlbDone:
6925 *
6926 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6927 * commit the popped register value.
6928 */
6929 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6930 }
6931#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6932
6933 if (idxGReg != X86_GREG_xSP)
6934 {
6935 /* Set the register. */
6936 if (cbMem >= sizeof(uint32_t))
6937 {
6938#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6939 AssertMsg( pReNative->idxCurCall == 0
6940 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6941 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6942#endif
6943 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6944#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6945 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6946#endif
6947#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6948 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6949 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6950#endif
6951 }
6952 else
6953 {
6954 Assert(cbMem == sizeof(uint16_t));
6955 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6956 kIemNativeGstRegUse_ForUpdate);
6957 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6958#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6959 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6960#endif
6961 iemNativeRegFreeTmp(pReNative, idxRegDst);
6962 }
6963
6964 /* Complete RSP calculation for FLAT mode. */
6965 if (idxRegEffSp == idxRegRsp)
6966 {
6967 if (cBitsFlat == 64)
6968 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6969 else
6970 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6971 }
6972 }
6973 else
6974 {
6975 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6976 if (cbMem == sizeof(uint64_t))
6977 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6978 else if (cbMem == sizeof(uint32_t))
6979 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6980 else
6981 {
6982 if (idxRegEffSp == idxRegRsp)
6983 {
6984 if (cBitsFlat == 64)
6985 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6986 else
6987 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6988 }
6989 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6990 }
6991 }
6992
6993#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6994 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6995#endif
6996
6997 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6998 if (idxRegEffSp != idxRegRsp)
6999 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7000 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7001
7002 return off;
7003}
7004
7005
7006
7007/*********************************************************************************************************************************
7008* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
7009*********************************************************************************************************************************/
7010
7011#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7012 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7013 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7014 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
7015
7016#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7017 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7018 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7019 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
7020
7021#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7022 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7023 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7024 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
7025
7026#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7027 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7028 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7029 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
7030
7031
7032#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7033 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7034 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7035 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
7036
7037#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7038 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7039 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7040 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
7041
7042#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7043 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7044 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7045 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7046
7047#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7048 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7049 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7050 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
7051
7052#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7053 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
7054 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7055 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7056
7057
7058#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7060 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7061 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
7062
7063#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7065 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7066 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
7067
7068#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7070 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7071 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7072
7073#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7074 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7075 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7076 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
7077
7078#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7079 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
7080 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7081 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7082
7083
7084#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7085 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7086 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7087 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
7088
7089#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7091 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7092 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
7093#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7094 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7095 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7096 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7097
7098#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7099 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7100 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7101 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
7102
7103#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7104 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
7105 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7106 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7107
7108
7109#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7110 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7111 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7112 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
7113
7114#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7116 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7117 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
7118
7119
7120#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7122 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7123 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
7124
7125#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7126 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7127 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7128 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
7129
7130#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7132 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7133 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
7134
7135#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7137 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7138 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
7139
7140
7141
7142#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7143 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7144 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7145 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
7146
7147#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7148 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7149 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7150 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
7151
7152#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7153 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7154 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7155 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
7156
7157#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7158 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7159 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7160 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
7161
7162
7163#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7165 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7166 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
7167
7168#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7169 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7170 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7171 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
7172
7173#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7174 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7175 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7176 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7177
7178#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7179 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7180 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7181 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
7182
7183#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
7184 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
7185 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7186 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7187
7188
7189#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7190 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7191 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7192 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
7193
7194#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7195 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7196 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7197 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
7198
7199#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7201 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7202 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7203
7204#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7206 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7207 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7208
7209#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7211 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7212 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7213
7214
7215#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7216 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7217 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7218 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7219
7220#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7222 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7223 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7224
7225#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7226 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7227 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7228 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7229
7230#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7231 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7232 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7233 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7234
7235#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7236 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7237 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7238 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7239
7240
7241#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7242 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7243 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7244 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7245
7246#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7247 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7248 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7249 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7250
7251
7252#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7253 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7254 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7255 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7256
7257#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7258 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7259 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7260 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7261
7262#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7263 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7264 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7265 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7266
7267#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7268 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7269 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7270 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7271
7272
7273DECL_INLINE_THROW(uint32_t)
7274iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7275 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7276 uintptr_t pfnFunction, uint8_t idxInstr)
7277{
7278 /*
7279 * Assert sanity.
7280 */
7281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7282 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7283 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7284 && pVarMem->cbVar == sizeof(void *),
7285 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7286
7287 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7288 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7289 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7290 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7291 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7292
7293 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7295 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7296 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7297 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7298
7299 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7300
7301 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7302
7303#ifdef VBOX_STRICT
7304# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7305 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7306 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7307 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7308 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7309# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7310 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7311 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7312 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7313
7314 if (iSegReg == UINT8_MAX)
7315 {
7316 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7317 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7318 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7319 switch (cbMem)
7320 {
7321 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7322 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7323 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7324 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7325 case 10:
7326 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7327 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7328 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7329 break;
7330 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7331# if 0
7332 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7333 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7334# endif
7335 default: AssertFailed(); break;
7336 }
7337 }
7338 else
7339 {
7340 Assert(iSegReg < 6);
7341 switch (cbMem)
7342 {
7343 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7344 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7345 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7346 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7347 case 10:
7348 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7349 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7350 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7351 break;
7352 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7353# if 0
7354 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7355 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7356# endif
7357 default: AssertFailed(); break;
7358 }
7359 }
7360# undef IEM_MAP_HLP_FN
7361# undef IEM_MAP_HLP_FN_NO_AT
7362#endif
7363
7364#ifdef VBOX_STRICT
7365 /*
7366 * Check that the fExec flags we've got make sense.
7367 */
7368 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7369#endif
7370
7371 /*
7372 * To keep things simple we have to commit any pending writes first as we
7373 * may end up making calls.
7374 */
7375 off = iemNativeRegFlushPendingWrites(pReNative, off);
7376
7377#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7378 /*
7379 * Move/spill/flush stuff out of call-volatile registers.
7380 * This is the easy way out. We could contain this to the tlb-miss branch
7381 * by saving and restoring active stuff here.
7382 */
7383 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7384 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7385#endif
7386
7387 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7388 while the tlb-miss codepath will temporarily put it on the stack.
7389 Set the the type to stack here so we don't need to do it twice below. */
7390 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7391 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7392 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7393 * lookup is done. */
7394
7395 /*
7396 * Define labels and allocate the result register (trying for the return
7397 * register if we can).
7398 */
7399 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7400 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7401 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7402 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7403 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7404 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7405 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7406 : UINT32_MAX;
7407//off=iemNativeEmitBrk(pReNative, off, 0);
7408 /*
7409 * Jump to the TLB lookup code.
7410 */
7411 if (!TlbState.fSkip)
7412 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7413
7414 /*
7415 * TlbMiss:
7416 *
7417 * Call helper to do the fetching.
7418 * We flush all guest register shadow copies here.
7419 */
7420 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7421
7422#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7423 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7424#else
7425 RT_NOREF(idxInstr);
7426#endif
7427
7428#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7429 /* Save variables in volatile registers. */
7430 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7431 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7432#endif
7433
7434 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7435 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7436#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7437 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7438#else
7439 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7440#endif
7441
7442 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7443 if (iSegReg != UINT8_MAX)
7444 {
7445 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7446 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7447 }
7448
7449 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7450 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7451 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7452
7453 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7454 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7455
7456 /* Done setting up parameters, make the call. */
7457 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7458
7459 /*
7460 * Put the output in the right registers.
7461 */
7462 Assert(idxRegMemResult == pVarMem->idxReg);
7463 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7464 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7465
7466#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7467 /* Restore variables and guest shadow registers to volatile registers. */
7468 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7469 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7470#endif
7471
7472 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7473 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7474
7475#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7476 if (!TlbState.fSkip)
7477 {
7478 /* end of tlbsmiss - Jump to the done label. */
7479 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7480 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7481
7482 /*
7483 * TlbLookup:
7484 */
7485 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7486 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7487# ifdef VBOX_WITH_STATISTICS
7488 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7489 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7490# endif
7491
7492 /* [idxVarUnmapInfo] = 0; */
7493 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7494
7495 /*
7496 * TlbDone:
7497 */
7498 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7499
7500 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7501
7502# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7503 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7504 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7505# endif
7506 }
7507#else
7508 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7509#endif
7510
7511 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7512 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7513
7514 return off;
7515}
7516
7517
7518#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7519 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7520 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7521
7522#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7523 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7524 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7525
7526#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7527 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7528 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7529
7530#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7531 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7532 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7533
7534DECL_INLINE_THROW(uint32_t)
7535iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7536 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7537{
7538 /*
7539 * Assert sanity.
7540 */
7541 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7542#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7543 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7544#endif
7545 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7546 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7547 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7548#ifdef VBOX_STRICT
7549 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7550 {
7551 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7552 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7553 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7554 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7555 case IEM_ACCESS_TYPE_WRITE:
7556 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7557 case IEM_ACCESS_TYPE_READ:
7558 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7559 default: AssertFailed();
7560 }
7561#else
7562 RT_NOREF(fAccess);
7563#endif
7564
7565 /*
7566 * To keep things simple we have to commit any pending writes first as we
7567 * may end up making calls (there shouldn't be any at this point, so this
7568 * is just for consistency).
7569 */
7570 /** @todo we could postpone this till we make the call and reload the
7571 * registers after returning from the call. Not sure if that's sensible or
7572 * not, though. */
7573 off = iemNativeRegFlushPendingWrites(pReNative, off);
7574
7575 /*
7576 * Move/spill/flush stuff out of call-volatile registers.
7577 *
7578 * We exclude any register holding the bUnmapInfo variable, as we'll be
7579 * checking it after returning from the call and will free it afterwards.
7580 */
7581 /** @todo save+restore active registers and maybe guest shadows in miss
7582 * scenario. */
7583 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7584 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7585
7586 /*
7587 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7588 * to call the unmap helper function.
7589 *
7590 * The likelyhood of it being zero is higher than for the TLB hit when doing
7591 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7592 * access should also end up with a mapping that won't need special unmapping.
7593 */
7594 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7595 * should speed up things for the pure interpreter as well when TLBs
7596 * are enabled. */
7597#ifdef RT_ARCH_AMD64
7598 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7599 {
7600 /* test byte [rbp - xxx], 0ffh */
7601 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7602 pbCodeBuf[off++] = 0xf6;
7603 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7604 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7605 pbCodeBuf[off++] = 0xff;
7606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7607 }
7608 else
7609#endif
7610 {
7611 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7612 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7613 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7614 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7615 }
7616 uint32_t const offJmpFixup = off;
7617 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7618
7619 /*
7620 * Call the unmap helper function.
7621 */
7622#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7623 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7624#else
7625 RT_NOREF(idxInstr);
7626#endif
7627
7628 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7629 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7630 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7631
7632 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7634
7635 /* Done setting up parameters, make the call. */
7636 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7637
7638 /* The bUnmapInfo variable is implictly free by these MCs. */
7639 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7640
7641 /*
7642 * Done, just fixup the jump for the non-call case.
7643 */
7644 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7645
7646 return off;
7647}
7648
7649
7650
7651/*********************************************************************************************************************************
7652* State and Exceptions *
7653*********************************************************************************************************************************/
7654
7655#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7656#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7657
7658#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7659#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7660#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7661
7662#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7663#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7664#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7665
7666
7667DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7668{
7669 /** @todo this needs a lot more work later. */
7670 RT_NOREF(pReNative, fForChange);
7671 return off;
7672}
7673
7674
7675
7676/*********************************************************************************************************************************
7677* Emitters for FPU related operations. *
7678*********************************************************************************************************************************/
7679
7680#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7681 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7682
7683/** Emits code for IEM_MC_FETCH_FCW. */
7684DECL_INLINE_THROW(uint32_t)
7685iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7686{
7687 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7689
7690 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7691
7692 /* Allocate a temporary FCW register. */
7693 /** @todo eliminate extra register */
7694 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7695 kIemNativeGstRegUse_ReadOnly);
7696
7697 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7698
7699 /* Free but don't flush the FCW register. */
7700 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7701 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7702
7703 return off;
7704}
7705
7706
7707#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7708 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7709
7710/** Emits code for IEM_MC_FETCH_FSW. */
7711DECL_INLINE_THROW(uint32_t)
7712iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7713{
7714 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7715 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7716
7717 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7718 /* Allocate a temporary FSW register. */
7719 /** @todo eliminate extra register */
7720 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7721 kIemNativeGstRegUse_ReadOnly);
7722
7723 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7724
7725 /* Free but don't flush the FSW register. */
7726 iemNativeRegFreeTmp(pReNative, idxFswReg);
7727 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7728
7729 return off;
7730}
7731
7732
7733
7734#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7735
7736
7737/*********************************************************************************************************************************
7738* Emitters for SSE/AVX specific operations. *
7739*********************************************************************************************************************************/
7740
7741#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7742 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7743
7744/** Emits code for IEM_MC_COPY_XREG_U128. */
7745DECL_INLINE_THROW(uint32_t)
7746iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7747{
7748 /* This is a nop if the source and destination register are the same. */
7749 if (iXRegDst != iXRegSrc)
7750 {
7751 /* Allocate destination and source register. */
7752 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7753 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7754 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7755 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7756
7757 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7758
7759 /* Free but don't flush the source and destination register. */
7760 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7761 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7762 }
7763
7764 return off;
7765}
7766
7767
7768#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7769 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7770
7771/** Emits code for IEM_MC_FETCH_XREG_U128. */
7772DECL_INLINE_THROW(uint32_t)
7773iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7774{
7775 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7776 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7777
7778 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7779 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7780
7781 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7782
7783 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7784
7785 /* Free but don't flush the source register. */
7786 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7787 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7788
7789 return off;
7790}
7791
7792
7793#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7794 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7795
7796#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
7797 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
7798
7799/** Emits code for IEM_MC_FETCH_XREG_U64. */
7800DECL_INLINE_THROW(uint32_t)
7801iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7802{
7803 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7804 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7805
7806 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7807 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7808
7809 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7810 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7811
7812 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7813
7814 /* Free but don't flush the source register. */
7815 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7816 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7817
7818 return off;
7819}
7820
7821
7822#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
7823 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
7824
7825#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
7826 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
7827
7828/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
7829DECL_INLINE_THROW(uint32_t)
7830iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7831{
7832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7833 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7834
7835 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7836 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7837
7838 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7840
7841 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7842
7843 /* Free but don't flush the source register. */
7844 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7845 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7846
7847 return off;
7848}
7849
7850
7851#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7852 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7853
7854/** Emits code for IEM_MC_FETCH_XREG_U16. */
7855DECL_INLINE_THROW(uint32_t)
7856iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7857{
7858 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7859 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7860
7861 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7862 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7863
7864 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7866
7867 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7868
7869 /* Free but don't flush the source register. */
7870 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7871 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7872
7873 return off;
7874}
7875
7876
7877#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7878 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7879
7880/** Emits code for IEM_MC_FETCH_XREG_U8. */
7881DECL_INLINE_THROW(uint32_t)
7882iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7883{
7884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7885 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7886
7887 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7888 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7889
7890 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7891 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7892
7893 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7894
7895 /* Free but don't flush the source register. */
7896 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7897 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7898
7899 return off;
7900}
7901
7902
7903#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7904 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7905
7906AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7907#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7908 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7909
7910
7911/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7912DECL_INLINE_THROW(uint32_t)
7913iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7914{
7915 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7916 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7917
7918 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7919 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7920 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7921
7922 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7923
7924 /* Free but don't flush the source register. */
7925 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7926 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7927
7928 return off;
7929}
7930
7931
7932#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7933 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7934
7935#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7936 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7937
7938#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7939 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7940
7941#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7942 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7943
7944#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7945 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7946
7947#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7948 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7949
7950/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7951DECL_INLINE_THROW(uint32_t)
7952iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
7953 uint8_t cbLocal, uint8_t iElem)
7954{
7955 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7956 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7957
7958#ifdef VBOX_STRICT
7959 switch (cbLocal)
7960 {
7961 case sizeof(uint64_t): Assert(iElem < 2); break;
7962 case sizeof(uint32_t): Assert(iElem < 4); break;
7963 case sizeof(uint16_t): Assert(iElem < 8); break;
7964 case sizeof(uint8_t): Assert(iElem < 16); break;
7965 default: AssertFailed();
7966 }
7967#endif
7968
7969 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7970 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7971 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7972
7973 switch (cbLocal)
7974 {
7975 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7976 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7977 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7978 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7979 default: AssertFailed();
7980 }
7981
7982 /* Free but don't flush the source register. */
7983 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7984 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7985
7986 return off;
7987}
7988
7989
7990#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7991 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7992
7993/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
7994DECL_INLINE_THROW(uint32_t)
7995iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7996{
7997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7998 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7999
8000 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8001 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8002 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8003
8004 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
8005 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8006 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8007
8008 /* Free but don't flush the source register. */
8009 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8010 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8011
8012 return off;
8013}
8014
8015
8016#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
8017 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
8018
8019/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
8020DECL_INLINE_THROW(uint32_t)
8021iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
8022{
8023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8024 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8025
8026 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8027 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8028 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8029
8030 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
8031 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8032 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8033
8034 /* Free but don't flush the source register. */
8035 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8036 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8037
8038 return off;
8039}
8040
8041
8042#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
8043 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
8044
8045/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
8046DECL_INLINE_THROW(uint32_t)
8047iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
8048 uint8_t idxSrcVar, uint8_t iDwSrc)
8049{
8050 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8051 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8052
8053 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8054 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8055 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8056
8057 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
8058 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
8059
8060 /* Free but don't flush the destination register. */
8061 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8062 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8063
8064 return off;
8065}
8066
8067
8068#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8069 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8070
8071/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
8072DECL_INLINE_THROW(uint32_t)
8073iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8074{
8075 /*
8076 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8077 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8078 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8079 */
8080 if (iYRegDst != iYRegSrc)
8081 {
8082 /* Allocate destination and source register. */
8083 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8084 kIemNativeGstSimdRegLdStSz_256,
8085 kIemNativeGstRegUse_ForFullWrite);
8086 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8087 kIemNativeGstSimdRegLdStSz_Low128,
8088 kIemNativeGstRegUse_ReadOnly);
8089
8090 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8091 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8092
8093 /* Free but don't flush the source and destination register. */
8094 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8095 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8096 }
8097 else
8098 {
8099 /* This effectively only clears the upper 128-bits of the register. */
8100 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8101 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8102
8103 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8104
8105 /* Free but don't flush the destination register. */
8106 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8107 }
8108
8109 return off;
8110}
8111
8112
8113#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8114 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8115
8116/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
8117DECL_INLINE_THROW(uint32_t)
8118iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8119{
8120 /*
8121 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8122 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8123 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8124 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
8125 */
8126 if (iYRegDst != iYRegSrc)
8127 {
8128 /* Allocate destination and source register. */
8129 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8130 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8131 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8132 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8133
8134 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8135
8136 /* Free but don't flush the source and destination register. */
8137 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8138 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8139 }
8140
8141 return off;
8142}
8143
8144
8145#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
8146 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
8147
8148/** Emits code for IEM_MC_FETCH_YREG_U128. */
8149DECL_INLINE_THROW(uint32_t)
8150iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
8151{
8152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8153 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8154
8155 Assert(iDQWord <= 1);
8156 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8157 iDQWord == 1
8158 ? kIemNativeGstSimdRegLdStSz_High128
8159 : kIemNativeGstSimdRegLdStSz_Low128,
8160 kIemNativeGstRegUse_ReadOnly);
8161
8162 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8163 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8164
8165 if (iDQWord == 1)
8166 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8167 else
8168 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8169
8170 /* Free but don't flush the source register. */
8171 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8172 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8173
8174 return off;
8175}
8176
8177
8178#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
8179 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
8180
8181/** Emits code for IEM_MC_FETCH_YREG_U64. */
8182DECL_INLINE_THROW(uint32_t)
8183iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
8184{
8185 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8186 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8187
8188 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8189 iQWord >= 2
8190 ? kIemNativeGstSimdRegLdStSz_High128
8191 : kIemNativeGstSimdRegLdStSz_Low128,
8192 kIemNativeGstRegUse_ReadOnly);
8193
8194 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8195 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8196
8197 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8198
8199 /* Free but don't flush the source register. */
8200 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8201 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8202
8203 return off;
8204}
8205
8206
8207#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8208 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8209
8210/** Emits code for IEM_MC_FETCH_YREG_U32. */
8211DECL_INLINE_THROW(uint32_t)
8212iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8213{
8214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8215 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8216
8217 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8218 iDWord >= 4
8219 ? kIemNativeGstSimdRegLdStSz_High128
8220 : kIemNativeGstSimdRegLdStSz_Low128,
8221 kIemNativeGstRegUse_ReadOnly);
8222
8223 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8224 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8225
8226 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8227
8228 /* Free but don't flush the source register. */
8229 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8230 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8231
8232 return off;
8233}
8234
8235
8236#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8237 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8238
8239/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8240DECL_INLINE_THROW(uint32_t)
8241iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8242{
8243 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8244 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8245
8246 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8247
8248 /* Free but don't flush the register. */
8249 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8250
8251 return off;
8252}
8253
8254
8255#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8256 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8257
8258/** Emits code for IEM_MC_STORE_YREG_U128. */
8259DECL_INLINE_THROW(uint32_t)
8260iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8261{
8262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8263 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8264
8265 Assert(iDQword <= 1);
8266 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8267 iDQword == 0
8268 ? kIemNativeGstSimdRegLdStSz_Low128
8269 : kIemNativeGstSimdRegLdStSz_High128,
8270 kIemNativeGstRegUse_ForFullWrite);
8271
8272 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8273
8274 if (iDQword == 0)
8275 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8276 else
8277 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8278
8279 /* Free but don't flush the source register. */
8280 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8281 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8282
8283 return off;
8284}
8285
8286
8287#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8288 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8289
8290/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8291DECL_INLINE_THROW(uint32_t)
8292iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8293{
8294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8295 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8296
8297 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8298 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8299
8300 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8301
8302 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8303 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8304
8305 /* Free but don't flush the source register. */
8306 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8307 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8308
8309 return off;
8310}
8311
8312
8313#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8314 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8315
8316/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8317DECL_INLINE_THROW(uint32_t)
8318iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8319{
8320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8321 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8322
8323 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8324 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8325
8326 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8327
8328 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8329 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8330
8331 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8332 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8333
8334 return off;
8335}
8336
8337
8338#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8339 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8340
8341/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8342DECL_INLINE_THROW(uint32_t)
8343iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8344{
8345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8346 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8347
8348 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8349 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8350
8351 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8352
8353 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8354 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8355
8356 /* Free but don't flush the source register. */
8357 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8358 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8359
8360 return off;
8361}
8362
8363
8364#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8365 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8366
8367/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8368DECL_INLINE_THROW(uint32_t)
8369iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8370{
8371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8372 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8373
8374 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8375 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8376
8377 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8378
8379 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8380 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8381
8382 /* Free but don't flush the source register. */
8383 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8384 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8385
8386 return off;
8387}
8388
8389
8390#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8391 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8392
8393/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8394DECL_INLINE_THROW(uint32_t)
8395iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8396{
8397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8398 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8399
8400 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8401 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8402
8403 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8404
8405 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8406 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8407
8408 /* Free but don't flush the source register. */
8409 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8410 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8411
8412 return off;
8413}
8414
8415
8416#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8417 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8418
8419/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8420DECL_INLINE_THROW(uint32_t)
8421iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8422{
8423 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8424 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8425
8426 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8427 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8428
8429 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8430
8431 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8432
8433 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8434 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8435
8436 return off;
8437}
8438
8439
8440#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8441 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8442
8443/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8444DECL_INLINE_THROW(uint32_t)
8445iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8446{
8447 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8448 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8449
8450 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8451 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8452
8453 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8454
8455 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8456
8457 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8458 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8459
8460 return off;
8461}
8462
8463
8464#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8465 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8466
8467/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8468DECL_INLINE_THROW(uint32_t)
8469iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8470{
8471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8472 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8473
8474 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8475 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8476
8477 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8478
8479 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8480
8481 /* Free but don't flush the source register. */
8482 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8483 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8484
8485 return off;
8486}
8487
8488
8489#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8490 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8491
8492/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8493DECL_INLINE_THROW(uint32_t)
8494iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8495{
8496 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8497 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8498
8499 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8500 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8501
8502 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8503
8504 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8505
8506 /* Free but don't flush the source register. */
8507 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8508 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8509
8510 return off;
8511}
8512
8513
8514#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8515 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8516
8517/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8518DECL_INLINE_THROW(uint32_t)
8519iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8520{
8521 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8522 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8523
8524 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8525 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8526
8527 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8528
8529 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8530
8531 /* Free but don't flush the source register. */
8532 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8533 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8534
8535 return off;
8536}
8537
8538
8539#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8540 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8541
8542/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8543DECL_INLINE_THROW(uint32_t)
8544iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8545{
8546 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8547 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8548
8549 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8550 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8551
8552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8553
8554 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8555 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8556
8557 /* Free but don't flush the source register. */
8558 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8559 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8560
8561 return off;
8562}
8563
8564
8565#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8566 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8567
8568/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8569DECL_INLINE_THROW(uint32_t)
8570iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8571{
8572 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8573 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8574
8575 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8576 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8577
8578 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8579
8580 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8581 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8582
8583 /* Free but don't flush the source register. */
8584 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8585 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8586
8587 return off;
8588}
8589
8590
8591#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8592 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8593
8594/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8595DECL_INLINE_THROW(uint32_t)
8596iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8597{
8598 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8599 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8600
8601 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8602 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8603 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8604 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8605 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8606
8607 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8608 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8609 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8610
8611 /* Free but don't flush the source and destination registers. */
8612 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8613 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8614 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8615
8616 return off;
8617}
8618
8619
8620#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8621 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8622
8623/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8624DECL_INLINE_THROW(uint32_t)
8625iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8626{
8627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8628 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8629
8630 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8631 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8632 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8633 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8634 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8635
8636 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8637 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8638 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8639
8640 /* Free but don't flush the source and destination registers. */
8641 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8642 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8643 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8644
8645 return off;
8646}
8647
8648
8649#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8650 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8651
8652
8653/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8654DECL_INLINE_THROW(uint32_t)
8655iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8656{
8657 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8658 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8659
8660 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8661 if (bImm8Mask & RT_BIT(0))
8662 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8663 if (bImm8Mask & RT_BIT(1))
8664 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8665 if (bImm8Mask & RT_BIT(2))
8666 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8667 if (bImm8Mask & RT_BIT(3))
8668 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8669
8670 /* Free but don't flush the destination register. */
8671 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8672
8673 return off;
8674}
8675
8676
8677#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8678 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8679
8680
8681/** Emits code for IEM_MC_FETCH_YREG_U256. */
8682DECL_INLINE_THROW(uint32_t)
8683iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8684{
8685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8687
8688 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8689 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8690 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8691
8692 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8693
8694 /* Free but don't flush the source register. */
8695 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8696 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8697
8698 return off;
8699}
8700
8701
8702#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8703 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8704
8705
8706/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8707DECL_INLINE_THROW(uint32_t)
8708iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8709{
8710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8711 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8712
8713 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8714 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8715 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8716
8717 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8718
8719 /* Free but don't flush the source register. */
8720 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8721 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8722
8723 return off;
8724}
8725
8726
8727#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
8728 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
8729
8730
8731/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
8732DECL_INLINE_THROW(uint32_t)
8733iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
8734 uint8_t idxSrcVar, uint8_t iDwSrc)
8735{
8736 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8737 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8738
8739 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8740 iDwDst < 4
8741 ? kIemNativeGstSimdRegLdStSz_Low128
8742 : kIemNativeGstSimdRegLdStSz_High128,
8743 kIemNativeGstRegUse_ForUpdate);
8744 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8745 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8746
8747 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
8748 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
8749
8750 /* Free but don't flush the source register. */
8751 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8752 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8753 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8754
8755 return off;
8756}
8757
8758
8759#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
8760 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
8761
8762
8763/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
8764DECL_INLINE_THROW(uint32_t)
8765iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
8766 uint8_t idxSrcVar, uint8_t iQwSrc)
8767{
8768 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8769 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8770
8771 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8772 iQwDst < 2
8773 ? kIemNativeGstSimdRegLdStSz_Low128
8774 : kIemNativeGstSimdRegLdStSz_High128,
8775 kIemNativeGstRegUse_ForUpdate);
8776 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8777 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8778
8779 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
8780 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
8781
8782 /* Free but don't flush the source register. */
8783 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8784 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8785 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8786
8787 return off;
8788}
8789
8790
8791#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
8792 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
8793
8794
8795/** Emits code for IEM_MC_STORE_YREG_U64. */
8796DECL_INLINE_THROW(uint32_t)
8797iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
8798{
8799 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8800 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8801
8802 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8803 iQwDst < 2
8804 ? kIemNativeGstSimdRegLdStSz_Low128
8805 : kIemNativeGstSimdRegLdStSz_High128,
8806 kIemNativeGstRegUse_ForUpdate);
8807
8808 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8809
8810 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
8811
8812 /* Free but don't flush the source register. */
8813 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8814 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8815
8816 return off;
8817}
8818
8819
8820#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
8821 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
8822
8823/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
8824DECL_INLINE_THROW(uint32_t)
8825iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8826{
8827 RT_NOREF(pReNative, iYReg);
8828 /** @todo Needs to be implemented when support for AVX-512 is added. */
8829 return off;
8830}
8831
8832
8833
8834/*********************************************************************************************************************************
8835* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8836*********************************************************************************************************************************/
8837
8838/**
8839 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
8840 */
8841DECL_INLINE_THROW(uint32_t)
8842iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8843{
8844 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
8845 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
8846 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8847 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8848
8849 /*
8850 * Need to do the FPU preparation.
8851 */
8852 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8853
8854 /*
8855 * Do all the call setup and cleanup.
8856 */
8857 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
8858
8859 /*
8860 * Load the MXCSR register into the first argument and mask out the current exception flags.
8861 */
8862 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
8863 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
8864
8865 /*
8866 * Make the call.
8867 */
8868 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8869
8870 /*
8871 * The updated MXCSR is in the return register.
8872 */
8873 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
8874
8875#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8876 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8877 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8878#endif
8879 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8880
8881 return off;
8882}
8883
8884
8885#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8886 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8887
8888/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8889DECL_INLINE_THROW(uint32_t)
8890iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8891{
8892 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8893 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8894 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8895}
8896
8897
8898#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8899 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8900
8901/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8902DECL_INLINE_THROW(uint32_t)
8903iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8904{
8905 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8906 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8907 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8908 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8909}
8910
8911
8912/*********************************************************************************************************************************
8913* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8914*********************************************************************************************************************************/
8915
8916#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8917 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8918
8919/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8920DECL_INLINE_THROW(uint32_t)
8921iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8922{
8923 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8924 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8925 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8926}
8927
8928
8929#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8930 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8931
8932/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8933DECL_INLINE_THROW(uint32_t)
8934iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8935{
8936 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8937 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8938 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8939 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8940}
8941#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8942
8943
8944/*********************************************************************************************************************************
8945* Include instruction emitters. *
8946*********************************************************************************************************************************/
8947#include "target-x86/IEMAllN8veEmit-x86.h"
8948
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette