VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104354

Last change on this file since 104354 was 104333, checked in by vboxsync, 10 months ago

VMM/IEM: Fixed untested IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE emitter. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 429.6 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104333 2024-04-13 00:21:14Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(idxInstr);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
365 }
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToRip64AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToEip32AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442
443 /* Free but don't flush the PC register. */
444 iemNativeRegFreeTmp(pReNative, idxPcReg);
445#endif
446
447#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
448 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
449
450 pReNative->Core.offPc += cbInstr;
451# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
452 off = iemNativePcAdjustCheck(pReNative, off);
453# endif
454 if (pReNative->cCondDepth)
455 off = iemNativeEmitPcWriteback(pReNative, off);
456 else
457 pReNative->Core.cInstrPcUpdateSkipped++;
458#endif
459
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
472
473/** Same as iemRegAddToIp16AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
490
491 /* Free but don't flush the PC register. */
492 iemNativeRegFreeTmp(pReNative, idxPcReg);
493#endif
494
495#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
496 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
497
498 pReNative->Core.offPc += cbInstr;
499# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
500 off = iemNativePcAdjustCheck(pReNative, off);
501# endif
502 if (pReNative->cCondDepth)
503 off = iemNativeEmitPcWriteback(pReNative, off);
504 else
505 pReNative->Core.cInstrPcUpdateSkipped++;
506#endif
507
508 return off;
509}
510
511
512
513/*********************************************************************************************************************************
514* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
515*********************************************************************************************************************************/
516
517#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
518 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
519 (a_enmEffOpSize), pCallEntry->idxInstr); \
520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
521
522#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
523 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
524 (a_enmEffOpSize), pCallEntry->idxInstr); \
525 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
526 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
527
528#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
529 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
530 IEMMODE_16BIT, pCallEntry->idxInstr); \
531 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
532
533#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
534 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
535 IEMMODE_16BIT, pCallEntry->idxInstr); \
536 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
537 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
538
539#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
540 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
541 IEMMODE_64BIT, pCallEntry->idxInstr); \
542 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
543
544#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
545 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
546 IEMMODE_64BIT, pCallEntry->idxInstr); \
547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
549
550/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
551 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
552 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
553DECL_INLINE_THROW(uint32_t)
554iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
555 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
556{
557 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
558
559 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
560 off = iemNativeRegFlushPendingWrites(pReNative, off);
561
562#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
563 Assert(pReNative->Core.offPc == 0);
564
565 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
566#endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition. */
572 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
573
574 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
575 {
576 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
578 }
579 else
580 {
581 /* Just truncate the result to 16-bit IP. */
582 Assert(enmEffOpSize == IEMMODE_16BIT);
583 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
584 }
585 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
586
587 /* Free but don't flush the PC register. */
588 iemNativeRegFreeTmp(pReNative, idxPcReg);
589
590 return off;
591}
592
593
594#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
595 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
596 (a_enmEffOpSize), pCallEntry->idxInstr); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
603 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
604
605#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
606 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
607 IEMMODE_16BIT, pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
615
616#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
618 IEMMODE_32BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_32BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
626
627/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
628 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
629 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
630DECL_INLINE_THROW(uint32_t)
631iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
632 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
633{
634 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
635
636 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
637 off = iemNativeRegFlushPendingWrites(pReNative, off);
638
639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
640 Assert(pReNative->Core.offPc == 0);
641
642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
643#endif
644
645 /* Allocate a temporary PC register. */
646 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
647
648 /* Perform the addition. */
649 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
650
651 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
652 if (enmEffOpSize == IEMMODE_16BIT)
653 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
654
655 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
656/** @todo we can skip this in 32-bit FLAT mode. */
657 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
658
659 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
660
661 /* Free but don't flush the PC register. */
662 iemNativeRegFreeTmp(pReNative, idxPcReg);
663
664 return off;
665}
666
667
668#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
669 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
671
672#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
676
677#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
678 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
680
681#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
682 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
683 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
684 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
685
686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
687 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
689
690#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
692 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
694
695/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
696DECL_INLINE_THROW(uint32_t)
697iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
698 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
699{
700 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
701 off = iemNativeRegFlushPendingWrites(pReNative, off);
702
703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
704 Assert(pReNative->Core.offPc == 0);
705
706 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
707#endif
708
709 /* Allocate a temporary PC register. */
710 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
711
712 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
713 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
714 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
715 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
716 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
717
718 /* Free but don't flush the PC register. */
719 iemNativeRegFreeTmp(pReNative, idxPcReg);
720
721 return off;
722}
723
724
725
726/*********************************************************************************************************************************
727* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
728*********************************************************************************************************************************/
729
730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
731#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
732 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
733
734/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
735#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
736 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
743 * clears flags. */
744#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
745 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
747
748/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
749 * clears flags. */
750#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
751 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
753
754/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
755 * clears flags. */
756#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
757 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
759
760#undef IEM_MC_SET_RIP_U16_AND_FINISH
761
762
763/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
764#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
765 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
766
767/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
768#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
769 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
772 * clears flags. */
773#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
774 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
775 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
776
777/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
778 * and clears flags. */
779#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
780 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
782
783#undef IEM_MC_SET_RIP_U32_AND_FINISH
784
785
786/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
787#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
788 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
789
790/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
791 * and clears flags. */
792#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
793 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
794 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
795
796#undef IEM_MC_SET_RIP_U64_AND_FINISH
797
798
799/** Same as iemRegRipJumpU16AndFinishNoFlags,
800 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
801DECL_INLINE_THROW(uint32_t)
802iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
803 uint8_t idxInstr, uint8_t cbVar)
804{
805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
807
808 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
809 off = iemNativeRegFlushPendingWrites(pReNative, off);
810
811#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
812 Assert(pReNative->Core.offPc == 0);
813
814 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
815#endif
816
817 /* Get a register with the new PC loaded from idxVarPc.
818 Note! This ASSUMES that the high bits of the GPR is zeroed. */
819 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
820
821 /* Check limit (may #GP(0) + exit TB). */
822 if (!f64Bit)
823/** @todo we can skip this test in FLAT 32-bit mode. */
824 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
825 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
826 else if (cbVar > sizeof(uint32_t))
827 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
828
829 /* Store the result. */
830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
831
832 iemNativeVarRegisterRelease(pReNative, idxVarPc);
833 /** @todo implictly free the variable? */
834
835 return off;
836}
837
838
839
840/*********************************************************************************************************************************
841* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
842*********************************************************************************************************************************/
843
844#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
845 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
846
847/**
848 * Emits code to check if a \#NM exception should be raised.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxInstr The current instruction.
854 */
855DECL_INLINE_THROW(uint32_t)
856iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
857{
858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
859 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
860
861 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
862 {
863#endif
864 /*
865 * Make sure we don't have any outstanding guest register writes as we may
866 * raise an #NM and all guest register must be up to date in CPUMCTX.
867 */
868 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
869 off = iemNativeRegFlushPendingWrites(pReNative, off);
870
871#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
872 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
873#else
874 RT_NOREF(idxInstr);
875#endif
876
877 /* Allocate a temporary CR0 register. */
878 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
879 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
880
881 /*
882 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
883 * return raisexcpt();
884 */
885 /* Test and jump. */
886 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
887
888 /* Free but don't flush the CR0 register. */
889 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
890
891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
893 }
894 else
895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
896#endif
897
898 return off;
899}
900
901
902#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
903 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
904
905/**
906 * Emits code to check if a \#NM exception should be raised.
907 *
908 * @returns New code buffer offset, UINT32_MAX on failure.
909 * @param pReNative The native recompile state.
910 * @param off The code buffer offset.
911 * @param idxInstr The current instruction.
912 */
913DECL_INLINE_THROW(uint32_t)
914iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
915{
916#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
917 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
918
919 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
920 {
921#endif
922 /*
923 * Make sure we don't have any outstanding guest register writes as we may
924 * raise an #NM and all guest register must be up to date in CPUMCTX.
925 */
926 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
927 off = iemNativeRegFlushPendingWrites(pReNative, off);
928
929#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
930 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
931#else
932 RT_NOREF(idxInstr);
933#endif
934
935 /* Allocate a temporary CR0 register. */
936 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
937
938 /*
939 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
940 * return raisexcpt();
941 */
942 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
943 /* Test and jump. */
944 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeLabelType_RaiseNm);
945
946 /* Free the CR0 register. */
947 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
948
949#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
950 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
951 }
952 else
953 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
954#endif
955
956 return off;
957}
958
959
960#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
961 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
962
963/**
964 * Emits code to check if a \#MF exception should be raised.
965 *
966 * @returns New code buffer offset, UINT32_MAX on failure.
967 * @param pReNative The native recompile state.
968 * @param off The code buffer offset.
969 * @param idxInstr The current instruction.
970 */
971DECL_INLINE_THROW(uint32_t)
972iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
973{
974 /*
975 * Make sure we don't have any outstanding guest register writes as we may
976 * raise an #MF and all guest register must be up to date in CPUMCTX.
977 */
978 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
979 off = iemNativeRegFlushPendingWrites(pReNative, off);
980
981#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
982 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
983#else
984 RT_NOREF(idxInstr);
985#endif
986
987 /* Allocate a temporary FSW register. */
988 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
989 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
990
991 /*
992 * if (FSW & X86_FSW_ES != 0)
993 * return raisexcpt();
994 */
995 /* Test and jump. */
996 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, idxLabelRaiseMf);
997
998 /* Free but don't flush the FSW register. */
999 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
1000
1001 return off;
1002}
1003
1004
1005#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
1006 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1007
1008/**
1009 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
1010 *
1011 * @returns New code buffer offset, UINT32_MAX on failure.
1012 * @param pReNative The native recompile state.
1013 * @param off The code buffer offset.
1014 * @param idxInstr The current instruction.
1015 */
1016DECL_INLINE_THROW(uint32_t)
1017iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1018{
1019#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1020 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
1021
1022 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
1023 {
1024#endif
1025 /*
1026 * Make sure we don't have any outstanding guest register writes as we may
1027 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1028 */
1029 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1030 off = iemNativeRegFlushPendingWrites(pReNative, off);
1031
1032#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1033 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1034#else
1035 RT_NOREF(idxInstr);
1036#endif
1037
1038 /* Allocate a temporary CR0 and CR4 register. */
1039 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
1040 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1041 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1042 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1043
1044 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
1045#ifdef RT_ARCH_AMD64
1046 /*
1047 * We do a modified test here:
1048 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
1049 * else { goto RaiseSseRelated; }
1050 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
1051 * all targets except the 386, which doesn't support SSE, this should
1052 * be a safe assumption.
1053 */
1054 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
1055 //pCodeBuf[off++] = 0xcc;
1056 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
1057 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
1058 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
1059 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
1060 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
1061 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
1062
1063#elif defined(RT_ARCH_ARM64)
1064 /*
1065 * We do a modified test here:
1066 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
1067 * else { goto RaiseSseRelated; }
1068 */
1069 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
1070 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1071 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
1072 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
1073 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
1074 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1075 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1076 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
1077 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1078 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1079 idxLabelRaiseSseRelated);
1080
1081#else
1082# error "Port me!"
1083#endif
1084
1085 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1086 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1087 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1088 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1089
1090#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1091 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1092 }
1093 else
1094 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1095#endif
1096
1097 return off;
1098}
1099
1100
1101#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1102 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1103
1104/**
1105 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1106 *
1107 * @returns New code buffer offset, UINT32_MAX on failure.
1108 * @param pReNative The native recompile state.
1109 * @param off The code buffer offset.
1110 * @param idxInstr The current instruction.
1111 */
1112DECL_INLINE_THROW(uint32_t)
1113iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1114{
1115#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1116 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1117
1118 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1119 {
1120#endif
1121 /*
1122 * Make sure we don't have any outstanding guest register writes as we may
1123 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1124 */
1125 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1126 off = iemNativeRegFlushPendingWrites(pReNative, off);
1127
1128#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1129 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1130#else
1131 RT_NOREF(idxInstr);
1132#endif
1133
1134 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1135 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1136 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1137 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1138 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1139 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1140
1141 /*
1142 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1143 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1144 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1145 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1146 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1147 * { likely }
1148 * else { goto RaiseAvxRelated; }
1149 */
1150#ifdef RT_ARCH_AMD64
1151 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1152 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1153 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1154 ^ 0x1a) ) { likely }
1155 else { goto RaiseAvxRelated; } */
1156 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1157 //pCodeBuf[off++] = 0xcc;
1158 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1159 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1160 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1161 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1162 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1163 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1164 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1165 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1166 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1167 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1168 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1169
1170#elif defined(RT_ARCH_ARM64)
1171 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1172 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1173 else { goto RaiseAvxRelated; } */
1174 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1175 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1176 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1177 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1178 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1179 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1180 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1181 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1182 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1183 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1184 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1185 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1186 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1187 idxLabelRaiseAvxRelated);
1188
1189#else
1190# error "Port me!"
1191#endif
1192
1193 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1194 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1195 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1196 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1197#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1198 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1199 }
1200 else
1201 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1202#endif
1203
1204 return off;
1205}
1206
1207
1208#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1209#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1210 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
1211
1212/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1213DECL_INLINE_THROW(uint32_t)
1214iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1215{
1216 /*
1217 * Make sure we don't have any outstanding guest register writes as we may
1218 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
1219 */
1220 off = iemNativeRegFlushPendingWrites(pReNative, off);
1221
1222#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1223 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1224#else
1225 RT_NOREF(idxInstr);
1226#endif
1227
1228 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1229 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1230 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1231
1232 /* mov tmp, varmxcsr */
1233 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1234 /* tmp &= X86_MXCSR_XCPT_MASK */
1235 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1236 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1237 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1238 /* tmp = ~tmp */
1239 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1240 /* tmp &= mxcsr */
1241 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1242 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1243 idxLabelRaiseSseAvxFpRelated);
1244
1245 /* Free but don't flush the MXCSR register. */
1246 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1247 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1248
1249 return off;
1250}
1251#endif
1252
1253
1254#define IEM_MC_RAISE_DIVIDE_ERROR() \
1255 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1256
1257/**
1258 * Emits code to raise a \#DE.
1259 *
1260 * @returns New code buffer offset, UINT32_MAX on failure.
1261 * @param pReNative The native recompile state.
1262 * @param off The code buffer offset.
1263 * @param idxInstr The current instruction.
1264 */
1265DECL_INLINE_THROW(uint32_t)
1266iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1267{
1268 /*
1269 * Make sure we don't have any outstanding guest register writes as we may
1270 */
1271 off = iemNativeRegFlushPendingWrites(pReNative, off);
1272
1273#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1274 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1275#else
1276 RT_NOREF(idxInstr);
1277#endif
1278
1279 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1280
1281 /* raise \#DE exception unconditionally. */
1282 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1283
1284 return off;
1285}
1286
1287
1288#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
1289 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
1290
1291/**
1292 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
1293 *
1294 * @returns New code buffer offset, UINT32_MAX on failure.
1295 * @param pReNative The native recompile state.
1296 * @param off The code buffer offset.
1297 * @param idxInstr The current instruction.
1298 * @param idxVarEffAddr Index of the variable containing the effective address to check.
1299 * @param cbAlign The alignment in bytes to check against.
1300 */
1301DECL_INLINE_THROW(uint32_t)
1302iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
1303{
1304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
1305 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
1306
1307 /*
1308 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
1309 */
1310 off = iemNativeRegFlushPendingWrites(pReNative, off);
1311
1312#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1313 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1314#else
1315 RT_NOREF(idxInstr);
1316#endif
1317
1318 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
1319 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
1320
1321 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
1322
1323 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
1324 return off;
1325}
1326
1327
1328/*********************************************************************************************************************************
1329* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1330*********************************************************************************************************************************/
1331
1332/**
1333 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1334 *
1335 * @returns Pointer to the condition stack entry on success, NULL on failure
1336 * (too many nestings)
1337 */
1338DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1339{
1340#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1341 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1342#endif
1343
1344 uint32_t const idxStack = pReNative->cCondDepth;
1345 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1346
1347 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1348 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1349
1350 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1351 pEntry->fInElse = false;
1352 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1353 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1354
1355 return pEntry;
1356}
1357
1358
1359/**
1360 * Start of the if-block, snapshotting the register and variable state.
1361 */
1362DECL_INLINE_THROW(void)
1363iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1364{
1365 Assert(offIfBlock != UINT32_MAX);
1366 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1367 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1368 Assert(!pEntry->fInElse);
1369
1370 /* Define the start of the IF block if request or for disassembly purposes. */
1371 if (idxLabelIf != UINT32_MAX)
1372 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1374 else
1375 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1376#else
1377 RT_NOREF(offIfBlock);
1378#endif
1379
1380#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1381 Assert(pReNative->Core.offPc == 0);
1382#endif
1383
1384 /* Copy the initial state so we can restore it in the 'else' block. */
1385 pEntry->InitialState = pReNative->Core;
1386}
1387
1388
1389#define IEM_MC_ELSE() } while (0); \
1390 off = iemNativeEmitElse(pReNative, off); \
1391 do {
1392
1393/** Emits code related to IEM_MC_ELSE. */
1394DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1395{
1396 /* Check sanity and get the conditional stack entry. */
1397 Assert(off != UINT32_MAX);
1398 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1399 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1400 Assert(!pEntry->fInElse);
1401
1402#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1403 /* Writeback any dirty shadow registers. */
1404 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1405 * in one of the branches and leave guest registers already dirty before the start of the if
1406 * block alone. */
1407 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1408#endif
1409
1410 /* Jump to the endif */
1411 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1412
1413 /* Define the else label and enter the else part of the condition. */
1414 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1415 pEntry->fInElse = true;
1416
1417#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1418 Assert(pReNative->Core.offPc == 0);
1419#endif
1420
1421 /* Snapshot the core state so we can do a merge at the endif and restore
1422 the snapshot we took at the start of the if-block. */
1423 pEntry->IfFinalState = pReNative->Core;
1424 pReNative->Core = pEntry->InitialState;
1425
1426 return off;
1427}
1428
1429
1430#define IEM_MC_ENDIF() } while (0); \
1431 off = iemNativeEmitEndIf(pReNative, off)
1432
1433/** Emits code related to IEM_MC_ENDIF. */
1434DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1435{
1436 /* Check sanity and get the conditional stack entry. */
1437 Assert(off != UINT32_MAX);
1438 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1439 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1440
1441#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1442 Assert(pReNative->Core.offPc == 0);
1443#endif
1444#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1445 /* Writeback any dirty shadow registers (else branch). */
1446 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1447 * in one of the branches and leave guest registers already dirty before the start of the if
1448 * block alone. */
1449 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1450#endif
1451
1452 /*
1453 * Now we have find common group with the core state at the end of the
1454 * if-final. Use the smallest common denominator and just drop anything
1455 * that isn't the same in both states.
1456 */
1457 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1458 * which is why we're doing this at the end of the else-block.
1459 * But we'd need more info about future for that to be worth the effort. */
1460 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1461#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1462 Assert( pOther->bmGstRegShadowDirty == 0
1463 && pReNative->Core.bmGstRegShadowDirty == 0);
1464#endif
1465
1466 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1467 {
1468 /* shadow guest stuff first. */
1469 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1470 if (fGstRegs)
1471 {
1472 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1473 do
1474 {
1475 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1476 fGstRegs &= ~RT_BIT_64(idxGstReg);
1477
1478 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1479 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1480 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1481 {
1482 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1483 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1484
1485#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1486 /* Writeback any dirty shadow registers we are about to unshadow. */
1487 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1488#endif
1489 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1490 }
1491 } while (fGstRegs);
1492 }
1493 else
1494 {
1495 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1496#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1497 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1498#endif
1499 }
1500
1501 /* Check variables next. For now we must require them to be identical
1502 or stuff we can recreate. */
1503 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1504 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1505 if (fVars)
1506 {
1507 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1508 do
1509 {
1510 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1511 fVars &= ~RT_BIT_32(idxVar);
1512
1513 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1514 {
1515 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1516 continue;
1517 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1518 {
1519 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1520 if (idxHstReg != UINT8_MAX)
1521 {
1522 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1523 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1524 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1525 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1526 }
1527 continue;
1528 }
1529 }
1530 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1531 continue;
1532
1533 /* Irreconcilable, so drop it. */
1534 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1535 if (idxHstReg != UINT8_MAX)
1536 {
1537 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1538 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1539 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1540 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1541 }
1542 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1543 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1544 } while (fVars);
1545 }
1546
1547 /* Finally, check that the host register allocations matches. */
1548 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1549 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1550 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1551 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1552 }
1553
1554 /*
1555 * Define the endif label and maybe the else one if we're still in the 'if' part.
1556 */
1557 if (!pEntry->fInElse)
1558 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1559 else
1560 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1561 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1562
1563 /* Pop the conditional stack.*/
1564 pReNative->cCondDepth -= 1;
1565
1566 return off;
1567}
1568
1569
1570#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1571 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1572 do {
1573
1574/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1575DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1576{
1577 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1578 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1579
1580 /* Get the eflags. */
1581 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1582 kIemNativeGstRegUse_ReadOnly);
1583
1584 /* Test and jump. */
1585 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1586
1587 /* Free but don't flush the EFlags register. */
1588 iemNativeRegFreeTmp(pReNative, idxEflReg);
1589
1590 /* Make a copy of the core state now as we start the if-block. */
1591 iemNativeCondStartIfBlock(pReNative, off);
1592
1593 return off;
1594}
1595
1596
1597#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1598 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1599 do {
1600
1601/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1602DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1603{
1604 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1605 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1606
1607 /* Get the eflags. */
1608 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1609 kIemNativeGstRegUse_ReadOnly);
1610
1611 /* Test and jump. */
1612 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1613
1614 /* Free but don't flush the EFlags register. */
1615 iemNativeRegFreeTmp(pReNative, idxEflReg);
1616
1617 /* Make a copy of the core state now as we start the if-block. */
1618 iemNativeCondStartIfBlock(pReNative, off);
1619
1620 return off;
1621}
1622
1623
1624#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1625 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1626 do {
1627
1628/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1629DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1630{
1631 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1632 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1633
1634 /* Get the eflags. */
1635 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1636 kIemNativeGstRegUse_ReadOnly);
1637
1638 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1639 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1640
1641 /* Test and jump. */
1642 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1643
1644 /* Free but don't flush the EFlags register. */
1645 iemNativeRegFreeTmp(pReNative, idxEflReg);
1646
1647 /* Make a copy of the core state now as we start the if-block. */
1648 iemNativeCondStartIfBlock(pReNative, off);
1649
1650 return off;
1651}
1652
1653
1654#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1655 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1656 do {
1657
1658/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1659DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1660{
1661 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1662 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1663
1664 /* Get the eflags. */
1665 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1666 kIemNativeGstRegUse_ReadOnly);
1667
1668 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1669 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1670
1671 /* Test and jump. */
1672 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1673
1674 /* Free but don't flush the EFlags register. */
1675 iemNativeRegFreeTmp(pReNative, idxEflReg);
1676
1677 /* Make a copy of the core state now as we start the if-block. */
1678 iemNativeCondStartIfBlock(pReNative, off);
1679
1680 return off;
1681}
1682
1683
1684#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1685 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1686 do {
1687
1688#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1689 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1690 do {
1691
1692/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1693DECL_INLINE_THROW(uint32_t)
1694iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1695 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1696{
1697 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1698 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1699
1700 /* Get the eflags. */
1701 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1702 kIemNativeGstRegUse_ReadOnly);
1703
1704 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1705 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1706
1707 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1708 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1709 Assert(iBitNo1 != iBitNo2);
1710
1711#ifdef RT_ARCH_AMD64
1712 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1713
1714 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1715 if (iBitNo1 > iBitNo2)
1716 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1717 else
1718 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1719 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1723 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1724
1725 /* and tmpreg, eflreg, #1<<iBitNo1 */
1726 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1727
1728 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1729 if (iBitNo1 > iBitNo2)
1730 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1731 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1732 else
1733 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1734 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1735
1736 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1737
1738#else
1739# error "Port me"
1740#endif
1741
1742 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1743 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1744 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1745
1746 /* Free but don't flush the EFlags and tmp registers. */
1747 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1748 iemNativeRegFreeTmp(pReNative, idxEflReg);
1749
1750 /* Make a copy of the core state now as we start the if-block. */
1751 iemNativeCondStartIfBlock(pReNative, off);
1752
1753 return off;
1754}
1755
1756
1757#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1758 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1759 do {
1760
1761#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1762 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1763 do {
1764
1765/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1766 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1767DECL_INLINE_THROW(uint32_t)
1768iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1769 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1770{
1771 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1772 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1773
1774 /* We need an if-block label for the non-inverted variant. */
1775 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1776 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1777
1778 /* Get the eflags. */
1779 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1780 kIemNativeGstRegUse_ReadOnly);
1781
1782 /* Translate the flag masks to bit numbers. */
1783 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1784 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1785
1786 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1787 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1788 Assert(iBitNo1 != iBitNo);
1789
1790 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1791 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1792 Assert(iBitNo2 != iBitNo);
1793 Assert(iBitNo2 != iBitNo1);
1794
1795#ifdef RT_ARCH_AMD64
1796 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1797#elif defined(RT_ARCH_ARM64)
1798 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1799#endif
1800
1801 /* Check for the lone bit first. */
1802 if (!fInverted)
1803 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1804 else
1805 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1806
1807 /* Then extract and compare the other two bits. */
1808#ifdef RT_ARCH_AMD64
1809 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1810 if (iBitNo1 > iBitNo2)
1811 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1812 else
1813 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1814 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1815
1816#elif defined(RT_ARCH_ARM64)
1817 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1818
1819 /* and tmpreg, eflreg, #1<<iBitNo1 */
1820 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1821
1822 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1823 if (iBitNo1 > iBitNo2)
1824 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1825 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1826 else
1827 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1828 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1829
1830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1831
1832#else
1833# error "Port me"
1834#endif
1835
1836 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1837 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1838 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1839
1840 /* Free but don't flush the EFlags and tmp registers. */
1841 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1842 iemNativeRegFreeTmp(pReNative, idxEflReg);
1843
1844 /* Make a copy of the core state now as we start the if-block. */
1845 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1846
1847 return off;
1848}
1849
1850
1851#define IEM_MC_IF_CX_IS_NZ() \
1852 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1853 do {
1854
1855/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1856DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1857{
1858 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1859
1860 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1861 kIemNativeGstRegUse_ReadOnly);
1862 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1863 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1864
1865 iemNativeCondStartIfBlock(pReNative, off);
1866 return off;
1867}
1868
1869
1870#define IEM_MC_IF_ECX_IS_NZ() \
1871 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1872 do {
1873
1874#define IEM_MC_IF_RCX_IS_NZ() \
1875 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1876 do {
1877
1878/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1879DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1880{
1881 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1882
1883 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1884 kIemNativeGstRegUse_ReadOnly);
1885 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1886 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1887
1888 iemNativeCondStartIfBlock(pReNative, off);
1889 return off;
1890}
1891
1892
1893#define IEM_MC_IF_CX_IS_NOT_ONE() \
1894 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1895 do {
1896
1897/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1898DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1899{
1900 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1901
1902 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1903 kIemNativeGstRegUse_ReadOnly);
1904#ifdef RT_ARCH_AMD64
1905 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1906#else
1907 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1908 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1909 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1910#endif
1911 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1912
1913 iemNativeCondStartIfBlock(pReNative, off);
1914 return off;
1915}
1916
1917
1918#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1919 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1920 do {
1921
1922#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1923 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1924 do {
1925
1926/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1927DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1928{
1929 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1930
1931 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1932 kIemNativeGstRegUse_ReadOnly);
1933 if (f64Bit)
1934 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1935 else
1936 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1937 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1938
1939 iemNativeCondStartIfBlock(pReNative, off);
1940 return off;
1941}
1942
1943
1944#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1945 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1946 do {
1947
1948#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1949 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1950 do {
1951
1952/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1953 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1954DECL_INLINE_THROW(uint32_t)
1955iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1956{
1957 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1958 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1959
1960 /* We have to load both RCX and EFLAGS before we can start branching,
1961 otherwise we'll end up in the else-block with an inconsistent
1962 register allocator state.
1963 Doing EFLAGS first as it's more likely to be loaded, right? */
1964 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1965 kIemNativeGstRegUse_ReadOnly);
1966 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1967 kIemNativeGstRegUse_ReadOnly);
1968
1969 /** @todo we could reduce this to a single branch instruction by spending a
1970 * temporary register and some setnz stuff. Not sure if loops are
1971 * worth it. */
1972 /* Check CX. */
1973#ifdef RT_ARCH_AMD64
1974 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1975#else
1976 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1977 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1978 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1979#endif
1980
1981 /* Check the EFlags bit. */
1982 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1983 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1984 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1985 !fCheckIfSet /*fJmpIfSet*/);
1986
1987 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1988 iemNativeRegFreeTmp(pReNative, idxEflReg);
1989
1990 iemNativeCondStartIfBlock(pReNative, off);
1991 return off;
1992}
1993
1994
1995#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1996 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1997 do {
1998
1999#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2000 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
2001 do {
2002
2003#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2004 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
2005 do {
2006
2007#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2008 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
2009 do {
2010
2011/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
2012 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
2013 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
2014 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2015DECL_INLINE_THROW(uint32_t)
2016iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2017 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
2018{
2019 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2020 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2021
2022 /* We have to load both RCX and EFLAGS before we can start branching,
2023 otherwise we'll end up in the else-block with an inconsistent
2024 register allocator state.
2025 Doing EFLAGS first as it's more likely to be loaded, right? */
2026 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2027 kIemNativeGstRegUse_ReadOnly);
2028 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2029 kIemNativeGstRegUse_ReadOnly);
2030
2031 /** @todo we could reduce this to a single branch instruction by spending a
2032 * temporary register and some setnz stuff. Not sure if loops are
2033 * worth it. */
2034 /* Check RCX/ECX. */
2035 if (f64Bit)
2036 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2037 else
2038 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2039
2040 /* Check the EFlags bit. */
2041 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2042 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2043 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
2044 !fCheckIfSet /*fJmpIfSet*/);
2045
2046 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2047 iemNativeRegFreeTmp(pReNative, idxEflReg);
2048
2049 iemNativeCondStartIfBlock(pReNative, off);
2050 return off;
2051}
2052
2053
2054#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
2055 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
2056 do {
2057
2058/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
2059DECL_INLINE_THROW(uint32_t)
2060iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
2061{
2062 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2063
2064 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
2065 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
2066 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2067 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2068
2069 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
2070
2071 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
2072
2073 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
2074
2075 iemNativeCondStartIfBlock(pReNative, off);
2076 return off;
2077}
2078
2079
2080#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
2081 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
2082 do {
2083
2084/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
2085DECL_INLINE_THROW(uint32_t)
2086iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
2087{
2088 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2089 Assert(iGReg < 16);
2090
2091 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2092 kIemNativeGstRegUse_ReadOnly);
2093
2094 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2095
2096 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2097
2098 iemNativeCondStartIfBlock(pReNative, off);
2099 return off;
2100}
2101
2102
2103
2104/*********************************************************************************************************************************
2105* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2106*********************************************************************************************************************************/
2107
2108#define IEM_MC_NOREF(a_Name) \
2109 RT_NOREF_PV(a_Name)
2110
2111#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2112 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2113
2114#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2115 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2116
2117#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2118 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2119
2120#define IEM_MC_LOCAL(a_Type, a_Name) \
2121 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2122
2123#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2124 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2125
2126#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2127 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2128
2129
2130/**
2131 * Sets the host register for @a idxVarRc to @a idxReg.
2132 *
2133 * The register must not be allocated. Any guest register shadowing will be
2134 * implictly dropped by this call.
2135 *
2136 * The variable must not have any register associated with it (causes
2137 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2138 * implied.
2139 *
2140 * @returns idxReg
2141 * @param pReNative The recompiler state.
2142 * @param idxVar The variable.
2143 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2144 * @param off For recording in debug info.
2145 *
2146 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2147 */
2148DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2149{
2150 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2151 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2152 Assert(!pVar->fRegAcquired);
2153 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2154 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2155 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2156
2157 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2158 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2159
2160 iemNativeVarSetKindToStack(pReNative, idxVar);
2161 pVar->idxReg = idxReg;
2162
2163 return idxReg;
2164}
2165
2166
2167/**
2168 * A convenient helper function.
2169 */
2170DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2171 uint8_t idxReg, uint32_t *poff)
2172{
2173 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2174 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2175 return idxReg;
2176}
2177
2178
2179/**
2180 * This is called by IEM_MC_END() to clean up all variables.
2181 */
2182DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2183{
2184 uint32_t const bmVars = pReNative->Core.bmVars;
2185 if (bmVars != 0)
2186 iemNativeVarFreeAllSlow(pReNative, bmVars);
2187 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2188 Assert(pReNative->Core.bmStack == 0);
2189}
2190
2191
2192#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2193
2194/**
2195 * This is called by IEM_MC_FREE_LOCAL.
2196 */
2197DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2198{
2199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2200 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2201 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2202}
2203
2204
2205#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2206
2207/**
2208 * This is called by IEM_MC_FREE_ARG.
2209 */
2210DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2211{
2212 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2213 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2214 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2215}
2216
2217
2218#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2219
2220/**
2221 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2222 */
2223DECL_INLINE_THROW(uint32_t)
2224iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2225{
2226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2227 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2228 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2229 Assert( pVarDst->cbVar == sizeof(uint16_t)
2230 || pVarDst->cbVar == sizeof(uint32_t));
2231
2232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2233 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2234 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2235 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2236 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2237
2238 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2239
2240 /*
2241 * Special case for immediates.
2242 */
2243 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2244 {
2245 switch (pVarDst->cbVar)
2246 {
2247 case sizeof(uint16_t):
2248 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2249 break;
2250 case sizeof(uint32_t):
2251 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2252 break;
2253 default: AssertFailed(); break;
2254 }
2255 }
2256 else
2257 {
2258 /*
2259 * The generic solution for now.
2260 */
2261 /** @todo optimize this by having the python script make sure the source
2262 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2263 * statement. Then we could just transfer the register assignments. */
2264 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2265 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2266 switch (pVarDst->cbVar)
2267 {
2268 case sizeof(uint16_t):
2269 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2270 break;
2271 case sizeof(uint32_t):
2272 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2273 break;
2274 default: AssertFailed(); break;
2275 }
2276 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2277 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2278 }
2279 return off;
2280}
2281
2282
2283
2284/*********************************************************************************************************************************
2285* Emitters for IEM_MC_CALL_CIMPL_XXX *
2286*********************************************************************************************************************************/
2287
2288/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2289DECL_INLINE_THROW(uint32_t)
2290iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2291 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2292
2293{
2294 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2295
2296#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2297 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2298 when a calls clobber any of the relevatn control registers. */
2299# if 1
2300 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2301 {
2302 /* Likely as long as call+ret are done via cimpl. */
2303 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2304 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2305 }
2306 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2307 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2308 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2309 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2310 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2311 else
2312 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2313 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2314 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2315
2316# else
2317 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2318 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2319 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2320 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2321 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2322 || pfnCImpl == (uintptr_t)iemCImpl_callf
2323 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2324 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2325 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2326 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2327 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2328# endif
2329#endif
2330
2331 /*
2332 * Do all the call setup and cleanup.
2333 */
2334 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2335
2336 /*
2337 * Load the two or three hidden arguments.
2338 */
2339#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2340 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2341 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2342 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2343#else
2344 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2345 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2346#endif
2347
2348 /*
2349 * Make the call and check the return code.
2350 *
2351 * Shadow PC copies are always flushed here, other stuff depends on flags.
2352 * Segment and general purpose registers are explictily flushed via the
2353 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2354 * macros.
2355 */
2356 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2357#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2358 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2359#endif
2360 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2361 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2362 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2363 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2364
2365 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2366}
2367
2368
2369#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2370 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2371
2372/** Emits code for IEM_MC_CALL_CIMPL_1. */
2373DECL_INLINE_THROW(uint32_t)
2374iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2375 uintptr_t pfnCImpl, uint8_t idxArg0)
2376{
2377 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2378 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2379}
2380
2381
2382#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2383 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2384
2385/** Emits code for IEM_MC_CALL_CIMPL_2. */
2386DECL_INLINE_THROW(uint32_t)
2387iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2388 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2389{
2390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2391 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2392 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2393}
2394
2395
2396#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2397 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2398 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2399
2400/** Emits code for IEM_MC_CALL_CIMPL_3. */
2401DECL_INLINE_THROW(uint32_t)
2402iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2403 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2404{
2405 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2406 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2407 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2408 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2409}
2410
2411
2412#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2413 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2414 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2415
2416/** Emits code for IEM_MC_CALL_CIMPL_4. */
2417DECL_INLINE_THROW(uint32_t)
2418iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2419 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2420{
2421 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2422 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2423 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2424 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2425 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2426}
2427
2428
2429#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2430 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2431 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2432
2433/** Emits code for IEM_MC_CALL_CIMPL_4. */
2434DECL_INLINE_THROW(uint32_t)
2435iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2436 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2437{
2438 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2439 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2440 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2441 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2442 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2443 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2444}
2445
2446
2447/** Recompiler debugging: Flush guest register shadow copies. */
2448#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2449
2450
2451
2452/*********************************************************************************************************************************
2453* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2454*********************************************************************************************************************************/
2455
2456/**
2457 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2458 */
2459DECL_INLINE_THROW(uint32_t)
2460iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2461 uintptr_t pfnAImpl, uint8_t cArgs)
2462{
2463 if (idxVarRc != UINT8_MAX)
2464 {
2465 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2466 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2467 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2468 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2469 }
2470
2471 /*
2472 * Do all the call setup and cleanup.
2473 *
2474 * It is only required to flush pending guest register writes in call volatile registers as
2475 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2476 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2477 * no matter the fFlushPendingWrites parameter.
2478 */
2479 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2480
2481 /*
2482 * Make the call and update the return code variable if we've got one.
2483 */
2484 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2485 if (idxVarRc != UINT8_MAX)
2486 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2487
2488 return off;
2489}
2490
2491
2492
2493#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2494 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2495
2496#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2497 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2498
2499/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2500DECL_INLINE_THROW(uint32_t)
2501iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2502{
2503 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2504}
2505
2506
2507#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2508 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2509
2510#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2511 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2512
2513/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2514DECL_INLINE_THROW(uint32_t)
2515iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2516{
2517 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2518 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2519}
2520
2521
2522#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2523 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2524
2525#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2526 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2527
2528/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2529DECL_INLINE_THROW(uint32_t)
2530iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2531 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2532{
2533 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2534 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2535 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2536}
2537
2538
2539#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2540 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2541
2542#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
2543 IEM_MC_LOCAL(a_rcType, a_rc); \
2544 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2545
2546/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2547DECL_INLINE_THROW(uint32_t)
2548iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2549 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2550{
2551 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2552 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2553 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2554 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2555}
2556
2557
2558#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2559 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2560
2561#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
2562 IEM_MC_LOCAL(a_rcType, a_rc); \
2563 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2564
2565/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2566DECL_INLINE_THROW(uint32_t)
2567iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2568 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2569{
2570 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2571 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2572 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2573 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2574 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2575}
2576
2577
2578
2579/*********************************************************************************************************************************
2580* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2581*********************************************************************************************************************************/
2582
2583#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2584 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2585
2586#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2587 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2588
2589#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2590 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2591
2592#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2593 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2594
2595
2596/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2597 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2598DECL_INLINE_THROW(uint32_t)
2599iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2600{
2601 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2603 Assert(iGRegEx < 20);
2604
2605 /* Same discussion as in iemNativeEmitFetchGregU16 */
2606 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2607 kIemNativeGstRegUse_ReadOnly);
2608
2609 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2610 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2611
2612 /* The value is zero-extended to the full 64-bit host register width. */
2613 if (iGRegEx < 16)
2614 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2615 else
2616 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2617
2618 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2619 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2620 return off;
2621}
2622
2623
2624#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2625 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2626
2627#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2628 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2629
2630#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2631 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2632
2633/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2634DECL_INLINE_THROW(uint32_t)
2635iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2636{
2637 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2638 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2639 Assert(iGRegEx < 20);
2640
2641 /* Same discussion as in iemNativeEmitFetchGregU16 */
2642 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2643 kIemNativeGstRegUse_ReadOnly);
2644
2645 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2646 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2647
2648 if (iGRegEx < 16)
2649 {
2650 switch (cbSignExtended)
2651 {
2652 case sizeof(uint16_t):
2653 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2654 break;
2655 case sizeof(uint32_t):
2656 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2657 break;
2658 case sizeof(uint64_t):
2659 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2660 break;
2661 default: AssertFailed(); break;
2662 }
2663 }
2664 else
2665 {
2666 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2667 switch (cbSignExtended)
2668 {
2669 case sizeof(uint16_t):
2670 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2671 break;
2672 case sizeof(uint32_t):
2673 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2674 break;
2675 case sizeof(uint64_t):
2676 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2677 break;
2678 default: AssertFailed(); break;
2679 }
2680 }
2681
2682 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2683 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2684 return off;
2685}
2686
2687
2688
2689#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2690 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2691
2692#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2693 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2694
2695#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2696 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2697
2698/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2699DECL_INLINE_THROW(uint32_t)
2700iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2701{
2702 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2703 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2704 Assert(iGReg < 16);
2705
2706 /*
2707 * We can either just load the low 16-bit of the GPR into a host register
2708 * for the variable, or we can do so via a shadow copy host register. The
2709 * latter will avoid having to reload it if it's being stored later, but
2710 * will waste a host register if it isn't touched again. Since we don't
2711 * know what going to happen, we choose the latter for now.
2712 */
2713 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2714 kIemNativeGstRegUse_ReadOnly);
2715
2716 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2717 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2718 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2719 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2720
2721 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2722 return off;
2723}
2724
2725
2726#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2727 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2728
2729#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2730 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2731
2732/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2733DECL_INLINE_THROW(uint32_t)
2734iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2735{
2736 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2737 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2738 Assert(iGReg < 16);
2739
2740 /*
2741 * We can either just load the low 16-bit of the GPR into a host register
2742 * for the variable, or we can do so via a shadow copy host register. The
2743 * latter will avoid having to reload it if it's being stored later, but
2744 * will waste a host register if it isn't touched again. Since we don't
2745 * know what going to happen, we choose the latter for now.
2746 */
2747 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2748 kIemNativeGstRegUse_ReadOnly);
2749
2750 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2751 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2752 if (cbSignExtended == sizeof(uint32_t))
2753 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2754 else
2755 {
2756 Assert(cbSignExtended == sizeof(uint64_t));
2757 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2758 }
2759 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2760
2761 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2762 return off;
2763}
2764
2765
2766#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2767 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2768
2769#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2770 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2771
2772/** Emits code for IEM_MC_FETCH_GREG_U32. */
2773DECL_INLINE_THROW(uint32_t)
2774iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2775{
2776 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2777 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2778 Assert(iGReg < 16);
2779
2780 /*
2781 * We can either just load the low 16-bit of the GPR into a host register
2782 * for the variable, or we can do so via a shadow copy host register. The
2783 * latter will avoid having to reload it if it's being stored later, but
2784 * will waste a host register if it isn't touched again. Since we don't
2785 * know what going to happen, we choose the latter for now.
2786 */
2787 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2788 kIemNativeGstRegUse_ReadOnly);
2789
2790 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2791 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2792 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2793 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2794
2795 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2796 return off;
2797}
2798
2799
2800#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2801 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2802
2803/** Emits code for IEM_MC_FETCH_GREG_U32. */
2804DECL_INLINE_THROW(uint32_t)
2805iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2806{
2807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2808 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2809 Assert(iGReg < 16);
2810
2811 /*
2812 * We can either just load the low 32-bit of the GPR into a host register
2813 * for the variable, or we can do so via a shadow copy host register. The
2814 * latter will avoid having to reload it if it's being stored later, but
2815 * will waste a host register if it isn't touched again. Since we don't
2816 * know what going to happen, we choose the latter for now.
2817 */
2818 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2819 kIemNativeGstRegUse_ReadOnly);
2820
2821 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2822 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2823 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2824 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2825
2826 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2827 return off;
2828}
2829
2830
2831#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2832 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2833
2834#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2835 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2836
2837/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2838 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2839DECL_INLINE_THROW(uint32_t)
2840iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2841{
2842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2843 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2844 Assert(iGReg < 16);
2845
2846 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2847 kIemNativeGstRegUse_ReadOnly);
2848
2849 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2850 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2852 /** @todo name the register a shadow one already? */
2853 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2854
2855 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2856 return off;
2857}
2858
2859
2860#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2861#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
2862 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
2863
2864/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
2865DECL_INLINE_THROW(uint32_t)
2866iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
2867{
2868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2869 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
2870 Assert(iGRegLo < 16 && iGRegHi < 16);
2871
2872 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
2873 kIemNativeGstRegUse_ReadOnly);
2874 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
2875 kIemNativeGstRegUse_ReadOnly);
2876
2877 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2878 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
2879 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
2880 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
2881
2882 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
2883 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
2884 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
2885 return off;
2886}
2887#endif
2888
2889
2890/*********************************************************************************************************************************
2891* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2892*********************************************************************************************************************************/
2893
2894#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2895 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2896
2897/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2898DECL_INLINE_THROW(uint32_t)
2899iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2900{
2901 Assert(iGRegEx < 20);
2902 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2903 kIemNativeGstRegUse_ForUpdate);
2904#ifdef RT_ARCH_AMD64
2905 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2906
2907 /* To the lowest byte of the register: mov r8, imm8 */
2908 if (iGRegEx < 16)
2909 {
2910 if (idxGstTmpReg >= 8)
2911 pbCodeBuf[off++] = X86_OP_REX_B;
2912 else if (idxGstTmpReg >= 4)
2913 pbCodeBuf[off++] = X86_OP_REX;
2914 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2915 pbCodeBuf[off++] = u8Value;
2916 }
2917 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2918 else if (idxGstTmpReg < 4)
2919 {
2920 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2921 pbCodeBuf[off++] = u8Value;
2922 }
2923 else
2924 {
2925 /* ror reg64, 8 */
2926 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2927 pbCodeBuf[off++] = 0xc1;
2928 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2929 pbCodeBuf[off++] = 8;
2930
2931 /* mov reg8, imm8 */
2932 if (idxGstTmpReg >= 8)
2933 pbCodeBuf[off++] = X86_OP_REX_B;
2934 else if (idxGstTmpReg >= 4)
2935 pbCodeBuf[off++] = X86_OP_REX;
2936 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2937 pbCodeBuf[off++] = u8Value;
2938
2939 /* rol reg64, 8 */
2940 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2941 pbCodeBuf[off++] = 0xc1;
2942 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2943 pbCodeBuf[off++] = 8;
2944 }
2945
2946#elif defined(RT_ARCH_ARM64)
2947 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2948 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2949 if (iGRegEx < 16)
2950 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2951 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2952 else
2953 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2954 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2955 iemNativeRegFreeTmp(pReNative, idxImmReg);
2956
2957#else
2958# error "Port me!"
2959#endif
2960
2961 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2962
2963#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2964 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2965#endif
2966
2967 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2968 return off;
2969}
2970
2971
2972#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2973 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2974
2975/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2976DECL_INLINE_THROW(uint32_t)
2977iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2978{
2979 Assert(iGRegEx < 20);
2980 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2981
2982 /*
2983 * If it's a constant value (unlikely) we treat this as a
2984 * IEM_MC_STORE_GREG_U8_CONST statement.
2985 */
2986 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2987 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2988 { /* likely */ }
2989 else
2990 {
2991 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2992 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2993 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2994 }
2995
2996 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2997 kIemNativeGstRegUse_ForUpdate);
2998 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2999
3000#ifdef RT_ARCH_AMD64
3001 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
3002 if (iGRegEx < 16)
3003 {
3004 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3005 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3006 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3007 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3008 pbCodeBuf[off++] = X86_OP_REX;
3009 pbCodeBuf[off++] = 0x8a;
3010 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3011 }
3012 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
3013 else if (idxGstTmpReg < 4 && idxVarReg < 4)
3014 {
3015 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
3016 pbCodeBuf[off++] = 0x8a;
3017 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
3018 }
3019 else
3020 {
3021 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
3022
3023 /* ror reg64, 8 */
3024 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3025 pbCodeBuf[off++] = 0xc1;
3026 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3027 pbCodeBuf[off++] = 8;
3028
3029 /* mov reg8, reg8(r/m) */
3030 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3031 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3032 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3033 pbCodeBuf[off++] = X86_OP_REX;
3034 pbCodeBuf[off++] = 0x8a;
3035 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3036
3037 /* rol reg64, 8 */
3038 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3039 pbCodeBuf[off++] = 0xc1;
3040 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3041 pbCodeBuf[off++] = 8;
3042 }
3043
3044#elif defined(RT_ARCH_ARM64)
3045 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
3046 or
3047 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
3048 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3049 if (iGRegEx < 16)
3050 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
3051 else
3052 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
3053
3054#else
3055# error "Port me!"
3056#endif
3057 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3058
3059 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3060
3061#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3062 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3063#endif
3064 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3065 return off;
3066}
3067
3068
3069
3070#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
3071 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
3072
3073/** Emits code for IEM_MC_STORE_GREG_U16. */
3074DECL_INLINE_THROW(uint32_t)
3075iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
3076{
3077 Assert(iGReg < 16);
3078 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3079 kIemNativeGstRegUse_ForUpdate);
3080#ifdef RT_ARCH_AMD64
3081 /* mov reg16, imm16 */
3082 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3083 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3084 if (idxGstTmpReg >= 8)
3085 pbCodeBuf[off++] = X86_OP_REX_B;
3086 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3087 pbCodeBuf[off++] = RT_BYTE1(uValue);
3088 pbCodeBuf[off++] = RT_BYTE2(uValue);
3089
3090#elif defined(RT_ARCH_ARM64)
3091 /* movk xdst, #uValue, lsl #0 */
3092 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3093 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3094
3095#else
3096# error "Port me!"
3097#endif
3098
3099 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3100
3101#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3102 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3103#endif
3104 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3105 return off;
3106}
3107
3108
3109#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3110 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3111
3112/** Emits code for IEM_MC_STORE_GREG_U16. */
3113DECL_INLINE_THROW(uint32_t)
3114iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3115{
3116 Assert(iGReg < 16);
3117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3118
3119 /*
3120 * If it's a constant value (unlikely) we treat this as a
3121 * IEM_MC_STORE_GREG_U16_CONST statement.
3122 */
3123 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3124 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3125 { /* likely */ }
3126 else
3127 {
3128 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3129 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3130 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3131 }
3132
3133 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3134 kIemNativeGstRegUse_ForUpdate);
3135
3136#ifdef RT_ARCH_AMD64
3137 /* mov reg16, reg16 or [mem16] */
3138 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3139 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3140 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3141 {
3142 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3143 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3144 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3145 pbCodeBuf[off++] = 0x8b;
3146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3147 }
3148 else
3149 {
3150 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3151 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3152 if (idxGstTmpReg >= 8)
3153 pbCodeBuf[off++] = X86_OP_REX_R;
3154 pbCodeBuf[off++] = 0x8b;
3155 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3156 }
3157
3158#elif defined(RT_ARCH_ARM64)
3159 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3160 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3161 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3162 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3163 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3164
3165#else
3166# error "Port me!"
3167#endif
3168
3169 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3170
3171#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3172 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3173#endif
3174 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3175 return off;
3176}
3177
3178
3179#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3180 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3181
3182/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3183DECL_INLINE_THROW(uint32_t)
3184iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3185{
3186 Assert(iGReg < 16);
3187 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3188 kIemNativeGstRegUse_ForFullWrite);
3189 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3190#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3191 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3192#endif
3193 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3194 return off;
3195}
3196
3197
3198#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3199 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3200
3201/** Emits code for IEM_MC_STORE_GREG_U32. */
3202DECL_INLINE_THROW(uint32_t)
3203iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3204{
3205 Assert(iGReg < 16);
3206 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3207
3208 /*
3209 * If it's a constant value (unlikely) we treat this as a
3210 * IEM_MC_STORE_GREG_U32_CONST statement.
3211 */
3212 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3213 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3214 { /* likely */ }
3215 else
3216 {
3217 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3218 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3219 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3220 }
3221
3222 /*
3223 * For the rest we allocate a guest register for the variable and writes
3224 * it to the CPUMCTX structure.
3225 */
3226 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3227#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3228 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3229#else
3230 RT_NOREF(idxVarReg);
3231#endif
3232#ifdef VBOX_STRICT
3233 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3234#endif
3235 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3236 return off;
3237}
3238
3239
3240#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3241 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3242
3243/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3244DECL_INLINE_THROW(uint32_t)
3245iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3246{
3247 Assert(iGReg < 16);
3248 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3249 kIemNativeGstRegUse_ForFullWrite);
3250 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3251#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3252 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3253#endif
3254 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3255 return off;
3256}
3257
3258
3259#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3260 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3261
3262#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3263 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3264
3265/** Emits code for IEM_MC_STORE_GREG_U64. */
3266DECL_INLINE_THROW(uint32_t)
3267iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3268{
3269 Assert(iGReg < 16);
3270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3271
3272 /*
3273 * If it's a constant value (unlikely) we treat this as a
3274 * IEM_MC_STORE_GREG_U64_CONST statement.
3275 */
3276 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3277 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3278 { /* likely */ }
3279 else
3280 {
3281 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3282 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3283 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3284 }
3285
3286 /*
3287 * For the rest we allocate a guest register for the variable and writes
3288 * it to the CPUMCTX structure.
3289 */
3290 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3291#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3292 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3293#else
3294 RT_NOREF(idxVarReg);
3295#endif
3296 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3297 return off;
3298}
3299
3300
3301#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3302 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3303
3304/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3305DECL_INLINE_THROW(uint32_t)
3306iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3307{
3308 Assert(iGReg < 16);
3309 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3310 kIemNativeGstRegUse_ForUpdate);
3311 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3312#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3313 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3314#endif
3315 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3316 return off;
3317}
3318
3319
3320#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3321#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
3322 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
3323
3324/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3325DECL_INLINE_THROW(uint32_t)
3326iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
3327{
3328 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3329 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3330 Assert(iGRegLo < 16 && iGRegHi < 16);
3331
3332 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3333 kIemNativeGstRegUse_ForFullWrite);
3334 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3335 kIemNativeGstRegUse_ForFullWrite);
3336
3337 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3338 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
3339 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
3340 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
3341
3342 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3343 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3344 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3345 return off;
3346}
3347#endif
3348
3349
3350/*********************************************************************************************************************************
3351* General purpose register manipulation (add, sub). *
3352*********************************************************************************************************************************/
3353
3354#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3355 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3356
3357/** Emits code for IEM_MC_ADD_GREG_U16. */
3358DECL_INLINE_THROW(uint32_t)
3359iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3360{
3361 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3362 kIemNativeGstRegUse_ForUpdate);
3363
3364#ifdef RT_ARCH_AMD64
3365 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3366 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3367 if (idxGstTmpReg >= 8)
3368 pbCodeBuf[off++] = X86_OP_REX_B;
3369 if (uAddend == 1)
3370 {
3371 pbCodeBuf[off++] = 0xff; /* inc */
3372 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3373 }
3374 else
3375 {
3376 pbCodeBuf[off++] = 0x81;
3377 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3378 pbCodeBuf[off++] = uAddend;
3379 pbCodeBuf[off++] = 0;
3380 }
3381
3382#else
3383 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3384 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3385
3386 /* sub tmp, gstgrp, uAddend */
3387 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3388
3389 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3390 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3391
3392 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3393#endif
3394
3395 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3396
3397#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3398 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3399#endif
3400
3401 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3402 return off;
3403}
3404
3405
3406#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3407 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3408
3409#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3410 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3411
3412/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3413DECL_INLINE_THROW(uint32_t)
3414iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3415{
3416 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3417 kIemNativeGstRegUse_ForUpdate);
3418
3419#ifdef RT_ARCH_AMD64
3420 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3421 if (f64Bit)
3422 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3423 else if (idxGstTmpReg >= 8)
3424 pbCodeBuf[off++] = X86_OP_REX_B;
3425 if (uAddend == 1)
3426 {
3427 pbCodeBuf[off++] = 0xff; /* inc */
3428 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3429 }
3430 else if (uAddend < 128)
3431 {
3432 pbCodeBuf[off++] = 0x83; /* add */
3433 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3434 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3435 }
3436 else
3437 {
3438 pbCodeBuf[off++] = 0x81; /* add */
3439 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3440 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3441 pbCodeBuf[off++] = 0;
3442 pbCodeBuf[off++] = 0;
3443 pbCodeBuf[off++] = 0;
3444 }
3445
3446#else
3447 /* sub tmp, gstgrp, uAddend */
3448 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3449 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3450
3451#endif
3452
3453 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3454
3455#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3456 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3457#endif
3458
3459 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3460 return off;
3461}
3462
3463
3464
3465#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3466 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3467
3468/** Emits code for IEM_MC_SUB_GREG_U16. */
3469DECL_INLINE_THROW(uint32_t)
3470iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3471{
3472 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3473 kIemNativeGstRegUse_ForUpdate);
3474
3475#ifdef RT_ARCH_AMD64
3476 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3477 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3478 if (idxGstTmpReg >= 8)
3479 pbCodeBuf[off++] = X86_OP_REX_B;
3480 if (uSubtrahend == 1)
3481 {
3482 pbCodeBuf[off++] = 0xff; /* dec */
3483 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3484 }
3485 else
3486 {
3487 pbCodeBuf[off++] = 0x81;
3488 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3489 pbCodeBuf[off++] = uSubtrahend;
3490 pbCodeBuf[off++] = 0;
3491 }
3492
3493#else
3494 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3495 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3496
3497 /* sub tmp, gstgrp, uSubtrahend */
3498 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3499
3500 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3501 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3502
3503 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3504#endif
3505
3506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3507
3508#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3509 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3510#endif
3511
3512 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3513 return off;
3514}
3515
3516
3517#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3518 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3519
3520#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3521 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3522
3523/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3524DECL_INLINE_THROW(uint32_t)
3525iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3526{
3527 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3528 kIemNativeGstRegUse_ForUpdate);
3529
3530#ifdef RT_ARCH_AMD64
3531 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3532 if (f64Bit)
3533 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3534 else if (idxGstTmpReg >= 8)
3535 pbCodeBuf[off++] = X86_OP_REX_B;
3536 if (uSubtrahend == 1)
3537 {
3538 pbCodeBuf[off++] = 0xff; /* dec */
3539 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3540 }
3541 else if (uSubtrahend < 128)
3542 {
3543 pbCodeBuf[off++] = 0x83; /* sub */
3544 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3545 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3546 }
3547 else
3548 {
3549 pbCodeBuf[off++] = 0x81; /* sub */
3550 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3551 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3552 pbCodeBuf[off++] = 0;
3553 pbCodeBuf[off++] = 0;
3554 pbCodeBuf[off++] = 0;
3555 }
3556
3557#else
3558 /* sub tmp, gstgrp, uSubtrahend */
3559 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3560 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3561
3562#endif
3563
3564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3565
3566#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3567 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3568#endif
3569
3570 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3571 return off;
3572}
3573
3574
3575#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3576 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3577
3578#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3579 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3580
3581#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3582 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3583
3584#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3585 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3586
3587/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3588DECL_INLINE_THROW(uint32_t)
3589iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3590{
3591#ifdef VBOX_STRICT
3592 switch (cbMask)
3593 {
3594 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3595 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3596 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3597 case sizeof(uint64_t): break;
3598 default: AssertFailedBreak();
3599 }
3600#endif
3601
3602 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3603 kIemNativeGstRegUse_ForUpdate);
3604
3605 switch (cbMask)
3606 {
3607 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3608 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3609 break;
3610 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3611 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3612 break;
3613 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3614 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3615 break;
3616 case sizeof(uint64_t):
3617 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3618 break;
3619 default: AssertFailedBreak();
3620 }
3621
3622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3623
3624#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3625 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3626#endif
3627
3628 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3629 return off;
3630}
3631
3632
3633#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3634 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3635
3636#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3637 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3638
3639#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3640 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3641
3642#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3643 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3644
3645/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3646DECL_INLINE_THROW(uint32_t)
3647iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3648{
3649#ifdef VBOX_STRICT
3650 switch (cbMask)
3651 {
3652 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3653 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3654 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3655 case sizeof(uint64_t): break;
3656 default: AssertFailedBreak();
3657 }
3658#endif
3659
3660 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3661 kIemNativeGstRegUse_ForUpdate);
3662
3663 switch (cbMask)
3664 {
3665 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3666 case sizeof(uint16_t):
3667 case sizeof(uint64_t):
3668 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3669 break;
3670 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3671 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3672 break;
3673 default: AssertFailedBreak();
3674 }
3675
3676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3677
3678#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3679 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3680#endif
3681
3682 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3683 return off;
3684}
3685
3686
3687/*********************************************************************************************************************************
3688* Local/Argument variable manipulation (add, sub, and, or). *
3689*********************************************************************************************************************************/
3690
3691#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3692 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3693
3694#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3695 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3696
3697#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3698 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3699
3700#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3701 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3702
3703
3704#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3705 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3706
3707#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3708 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3709
3710#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3711 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3712
3713/** Emits code for AND'ing a local and a constant value. */
3714DECL_INLINE_THROW(uint32_t)
3715iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3716{
3717#ifdef VBOX_STRICT
3718 switch (cbMask)
3719 {
3720 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3721 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3722 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3723 case sizeof(uint64_t): break;
3724 default: AssertFailedBreak();
3725 }
3726#endif
3727
3728 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3729 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3730
3731 if (cbMask <= sizeof(uint32_t))
3732 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3733 else
3734 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3735
3736 iemNativeVarRegisterRelease(pReNative, idxVar);
3737 return off;
3738}
3739
3740
3741#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3742 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3743
3744#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3745 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3746
3747#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3748 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3749
3750#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3751 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3752
3753/** Emits code for OR'ing a local and a constant value. */
3754DECL_INLINE_THROW(uint32_t)
3755iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3756{
3757#ifdef VBOX_STRICT
3758 switch (cbMask)
3759 {
3760 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3761 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3762 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3763 case sizeof(uint64_t): break;
3764 default: AssertFailedBreak();
3765 }
3766#endif
3767
3768 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3769 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3770
3771 if (cbMask <= sizeof(uint32_t))
3772 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3773 else
3774 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3775
3776 iemNativeVarRegisterRelease(pReNative, idxVar);
3777 return off;
3778}
3779
3780
3781#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3782 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3783
3784#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3785 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3786
3787#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3788 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3789
3790/** Emits code for reversing the byte order in a local value. */
3791DECL_INLINE_THROW(uint32_t)
3792iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3793{
3794 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3795 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3796
3797 switch (cbLocal)
3798 {
3799 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3800 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3801 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3802 default: AssertFailedBreak();
3803 }
3804
3805 iemNativeVarRegisterRelease(pReNative, idxVar);
3806 return off;
3807}
3808
3809
3810#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3811 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3812
3813#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3814 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3815
3816#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3817 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3818
3819/** Emits code for shifting left a local value. */
3820DECL_INLINE_THROW(uint32_t)
3821iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3822{
3823#ifdef VBOX_STRICT
3824 switch (cbLocal)
3825 {
3826 case sizeof(uint8_t): Assert(cShift < 8); break;
3827 case sizeof(uint16_t): Assert(cShift < 16); break;
3828 case sizeof(uint32_t): Assert(cShift < 32); break;
3829 case sizeof(uint64_t): Assert(cShift < 64); break;
3830 default: AssertFailedBreak();
3831 }
3832#endif
3833
3834 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3835 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3836
3837 if (cbLocal <= sizeof(uint32_t))
3838 {
3839 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3840 if (cbLocal < sizeof(uint32_t))
3841 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3842 cbLocal == sizeof(uint16_t)
3843 ? UINT32_C(0xffff)
3844 : UINT32_C(0xff));
3845 }
3846 else
3847 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3848
3849 iemNativeVarRegisterRelease(pReNative, idxVar);
3850 return off;
3851}
3852
3853
3854#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3855 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3856
3857#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3858 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3859
3860#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3861 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3862
3863/** Emits code for shifting left a local value. */
3864DECL_INLINE_THROW(uint32_t)
3865iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3866{
3867#ifdef VBOX_STRICT
3868 switch (cbLocal)
3869 {
3870 case sizeof(int8_t): Assert(cShift < 8); break;
3871 case sizeof(int16_t): Assert(cShift < 16); break;
3872 case sizeof(int32_t): Assert(cShift < 32); break;
3873 case sizeof(int64_t): Assert(cShift < 64); break;
3874 default: AssertFailedBreak();
3875 }
3876#endif
3877
3878 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3879 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3880
3881 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3882 if (cbLocal == sizeof(uint8_t))
3883 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3884 else if (cbLocal == sizeof(uint16_t))
3885 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3886
3887 if (cbLocal <= sizeof(uint32_t))
3888 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3889 else
3890 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3891
3892 iemNativeVarRegisterRelease(pReNative, idxVar);
3893 return off;
3894}
3895
3896
3897#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3898 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3899
3900#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3901 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3902
3903#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3904 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3905
3906/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3907DECL_INLINE_THROW(uint32_t)
3908iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3909{
3910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3911 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3912 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3913 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3914
3915 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3916 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3917
3918 /* Need to sign extend the value. */
3919 if (cbLocal <= sizeof(uint32_t))
3920 {
3921/** @todo ARM64: In case of boredone, the extended add instruction can do the
3922 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3923 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3924
3925 switch (cbLocal)
3926 {
3927 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3928 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3929 default: AssertFailed();
3930 }
3931
3932 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3933 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3934 }
3935 else
3936 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3937
3938 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3939 iemNativeVarRegisterRelease(pReNative, idxVar);
3940 return off;
3941}
3942
3943
3944
3945/*********************************************************************************************************************************
3946* EFLAGS *
3947*********************************************************************************************************************************/
3948
3949#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3950# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3951#else
3952# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3953 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3954
3955DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3956{
3957 if (fEflOutput)
3958 {
3959 PVMCPUCC const pVCpu = pReNative->pVCpu;
3960# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3961 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3962 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3963 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3964# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3965 if (fEflOutput & (a_fEfl)) \
3966 { \
3967 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3968 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3969 else \
3970 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3971 } else do { } while (0)
3972# else
3973 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3974 IEMLIVENESSBIT const LivenessClobbered =
3975 {
3976 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3977 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3978 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3979 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3980 };
3981 IEMLIVENESSBIT const LivenessDelayable =
3982 {
3983 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3984 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3985 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3986 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3987 };
3988# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3989 if (fEflOutput & (a_fEfl)) \
3990 { \
3991 if (LivenessClobbered.a_fLivenessMember) \
3992 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3993 else if (LivenessDelayable.a_fLivenessMember) \
3994 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3995 else \
3996 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3997 } else do { } while (0)
3998# endif
3999 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
4000 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
4001 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
4002 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
4003 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
4004 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
4005 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
4006# undef CHECK_FLAG_AND_UPDATE_STATS
4007 }
4008 RT_NOREF(fEflInput);
4009}
4010#endif /* VBOX_WITH_STATISTICS */
4011
4012#undef IEM_MC_FETCH_EFLAGS /* should not be used */
4013#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4014 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
4015
4016/** Handles IEM_MC_FETCH_EFLAGS_EX. */
4017DECL_INLINE_THROW(uint32_t)
4018iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
4019 uint32_t fEflInput, uint32_t fEflOutput)
4020{
4021 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
4022 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4023 RT_NOREF(fEflInput, fEflOutput);
4024
4025#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4026# ifdef VBOX_STRICT
4027 if ( pReNative->idxCurCall != 0
4028 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
4029 {
4030 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
4031 uint32_t const fBoth = fEflInput | fEflOutput;
4032# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
4033 AssertMsg( !(fBoth & (a_fElfConst)) \
4034 || (!(fEflInput & (a_fElfConst)) \
4035 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4036 : !(fEflOutput & (a_fElfConst)) \
4037 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4038 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
4039 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
4040 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
4041 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
4042 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
4043 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
4044 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
4045 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
4046 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
4047# undef ASSERT_ONE_EFL
4048 }
4049# endif
4050#endif
4051
4052 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4053
4054 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
4055 * the existing shadow copy. */
4056 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
4057 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4058 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
4059 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4060 return off;
4061}
4062
4063
4064
4065/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
4066 * start using it with custom native code emission (inlining assembly
4067 * instruction helpers). */
4068#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
4069#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4070 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4071 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
4072
4073#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
4074#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4075 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4076 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
4077
4078/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
4079DECL_INLINE_THROW(uint32_t)
4080iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
4081 bool fUpdateSkipping)
4082{
4083 RT_NOREF(fEflOutput);
4084 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
4085 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4086
4087#ifdef VBOX_STRICT
4088 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
4089 uint32_t offFixup = off;
4090 off = iemNativeEmitJnzToFixed(pReNative, off, off);
4091 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
4092 iemNativeFixupFixedJump(pReNative, offFixup, off);
4093
4094 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
4095 offFixup = off;
4096 off = iemNativeEmitJzToFixed(pReNative, off, off);
4097 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
4098 iemNativeFixupFixedJump(pReNative, offFixup, off);
4099
4100 /** @todo validate that only bits in the fElfOutput mask changed. */
4101#endif
4102
4103#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4104 if (fUpdateSkipping)
4105 {
4106 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4107 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4108 else
4109 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4110 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4111 }
4112#else
4113 RT_NOREF_PV(fUpdateSkipping);
4114#endif
4115
4116 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4117 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4118 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4119 return off;
4120}
4121
4122
4123typedef enum IEMNATIVEMITEFLOP
4124{
4125 kIemNativeEmitEflOp_Invalid = 0,
4126 kIemNativeEmitEflOp_Set,
4127 kIemNativeEmitEflOp_Clear,
4128 kIemNativeEmitEflOp_Flip
4129} IEMNATIVEMITEFLOP;
4130
4131#define IEM_MC_SET_EFL_BIT(a_fBit) \
4132 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
4133
4134#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
4135 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
4136
4137#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
4138 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
4139
4140/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
4141DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
4142{
4143 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4144 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
4145
4146 switch (enmOp)
4147 {
4148 case kIemNativeEmitEflOp_Set:
4149 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4150 break;
4151 case kIemNativeEmitEflOp_Clear:
4152 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
4153 break;
4154 case kIemNativeEmitEflOp_Flip:
4155 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4156 break;
4157 default:
4158 AssertFailed();
4159 break;
4160 }
4161
4162 /** @todo No delayed writeback for EFLAGS right now. */
4163 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4164
4165 /* Free but don't flush the EFLAGS register. */
4166 iemNativeRegFreeTmp(pReNative, idxEflReg);
4167
4168 return off;
4169}
4170
4171
4172/*********************************************************************************************************************************
4173* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4174*********************************************************************************************************************************/
4175
4176#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4177 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4178
4179#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4180 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4181
4182#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4183 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4184
4185
4186/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4187 * IEM_MC_FETCH_SREG_ZX_U64. */
4188DECL_INLINE_THROW(uint32_t)
4189iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4190{
4191 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4192 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4193 Assert(iSReg < X86_SREG_COUNT);
4194
4195 /*
4196 * For now, we will not create a shadow copy of a selector. The rational
4197 * is that since we do not recompile the popping and loading of segment
4198 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4199 * pushing and moving to registers, there is only a small chance that the
4200 * shadow copy will be accessed again before the register is reloaded. One
4201 * scenario would be nested called in 16-bit code, but I doubt it's worth
4202 * the extra register pressure atm.
4203 *
4204 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4205 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4206 * store scencario covered at present (r160730).
4207 */
4208 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4209 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4210 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4211 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4212 return off;
4213}
4214
4215
4216
4217/*********************************************************************************************************************************
4218* Register references. *
4219*********************************************************************************************************************************/
4220
4221#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4222 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4223
4224#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4225 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4226
4227/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4228DECL_INLINE_THROW(uint32_t)
4229iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4230{
4231 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4233 Assert(iGRegEx < 20);
4234
4235 if (iGRegEx < 16)
4236 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4237 else
4238 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4239
4240 /* If we've delayed writing back the register value, flush it now. */
4241 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4242
4243 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4244 if (!fConst)
4245 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4246
4247 return off;
4248}
4249
4250#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4251 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4252
4253#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4254 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4255
4256#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4257 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4258
4259#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4260 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4261
4262#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4263 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4264
4265#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4266 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4267
4268#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4269 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4270
4271#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4272 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4273
4274#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4275 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4276
4277#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4278 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4279
4280/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4281DECL_INLINE_THROW(uint32_t)
4282iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4283{
4284 Assert(iGReg < 16);
4285 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4286 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4287
4288 /* If we've delayed writing back the register value, flush it now. */
4289 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4290
4291 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4292 if (!fConst)
4293 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4294
4295 return off;
4296}
4297
4298
4299#undef IEM_MC_REF_EFLAGS /* should not be used. */
4300#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4301 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4302 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4303
4304/** Handles IEM_MC_REF_EFLAGS. */
4305DECL_INLINE_THROW(uint32_t)
4306iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4307{
4308 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4309 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4310
4311#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4312 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4313
4314 /* Updating the skipping according to the outputs is a little early, but
4315 we don't have any other hooks for references atm. */
4316 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4317 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4318 else if (fEflOutput & X86_EFL_STATUS_BITS)
4319 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4320 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4321#else
4322 RT_NOREF(fEflInput, fEflOutput);
4323#endif
4324
4325 /* If we've delayed writing back the register value, flush it now. */
4326 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4327
4328 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4329 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4330
4331 return off;
4332}
4333
4334
4335/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4336 * different code from threaded recompiler, maybe it would be helpful. For now
4337 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4338#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4339
4340
4341#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4342 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4343
4344#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4345 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4346
4347#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4348 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4349
4350#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4351/* Just being paranoid here. */
4352# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4353AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4354AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4355AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4356AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4357# endif
4358AssertCompileMemberOffset(X86XMMREG, au64, 0);
4359AssertCompileMemberOffset(X86XMMREG, au32, 0);
4360AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4361AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4362
4363# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4364 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4365# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4366 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4367# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4368 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4369# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4370 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4371#endif
4372
4373/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4374DECL_INLINE_THROW(uint32_t)
4375iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4376{
4377 Assert(iXReg < 16);
4378 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4379 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4380
4381 /* If we've delayed writing back the register value, flush it now. */
4382 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4383
4384#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4385 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4386 if (!fConst)
4387 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4388#else
4389 RT_NOREF(fConst);
4390#endif
4391
4392 return off;
4393}
4394
4395
4396
4397/*********************************************************************************************************************************
4398* Effective Address Calculation *
4399*********************************************************************************************************************************/
4400#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4401 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4402
4403/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4404 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4405DECL_INLINE_THROW(uint32_t)
4406iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4407 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4408{
4409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4410
4411 /*
4412 * Handle the disp16 form with no registers first.
4413 *
4414 * Convert to an immediate value, as that'll delay the register allocation
4415 * and assignment till the memory access / call / whatever and we can use
4416 * a more appropriate register (or none at all).
4417 */
4418 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4419 {
4420 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4421 return off;
4422 }
4423
4424 /* Determin the displacment. */
4425 uint16_t u16EffAddr;
4426 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4427 {
4428 case 0: u16EffAddr = 0; break;
4429 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4430 case 2: u16EffAddr = u16Disp; break;
4431 default: AssertFailedStmt(u16EffAddr = 0);
4432 }
4433
4434 /* Determine the registers involved. */
4435 uint8_t idxGstRegBase;
4436 uint8_t idxGstRegIndex;
4437 switch (bRm & X86_MODRM_RM_MASK)
4438 {
4439 case 0:
4440 idxGstRegBase = X86_GREG_xBX;
4441 idxGstRegIndex = X86_GREG_xSI;
4442 break;
4443 case 1:
4444 idxGstRegBase = X86_GREG_xBX;
4445 idxGstRegIndex = X86_GREG_xDI;
4446 break;
4447 case 2:
4448 idxGstRegBase = X86_GREG_xBP;
4449 idxGstRegIndex = X86_GREG_xSI;
4450 break;
4451 case 3:
4452 idxGstRegBase = X86_GREG_xBP;
4453 idxGstRegIndex = X86_GREG_xDI;
4454 break;
4455 case 4:
4456 idxGstRegBase = X86_GREG_xSI;
4457 idxGstRegIndex = UINT8_MAX;
4458 break;
4459 case 5:
4460 idxGstRegBase = X86_GREG_xDI;
4461 idxGstRegIndex = UINT8_MAX;
4462 break;
4463 case 6:
4464 idxGstRegBase = X86_GREG_xBP;
4465 idxGstRegIndex = UINT8_MAX;
4466 break;
4467#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4468 default:
4469#endif
4470 case 7:
4471 idxGstRegBase = X86_GREG_xBX;
4472 idxGstRegIndex = UINT8_MAX;
4473 break;
4474 }
4475
4476 /*
4477 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4478 */
4479 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4480 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4481 kIemNativeGstRegUse_ReadOnly);
4482 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4483 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4484 kIemNativeGstRegUse_ReadOnly)
4485 : UINT8_MAX;
4486#ifdef RT_ARCH_AMD64
4487 if (idxRegIndex == UINT8_MAX)
4488 {
4489 if (u16EffAddr == 0)
4490 {
4491 /* movxz ret, base */
4492 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4493 }
4494 else
4495 {
4496 /* lea ret32, [base64 + disp32] */
4497 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4498 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4499 if (idxRegRet >= 8 || idxRegBase >= 8)
4500 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4501 pbCodeBuf[off++] = 0x8d;
4502 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4503 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4504 else
4505 {
4506 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4507 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4508 }
4509 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4510 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4511 pbCodeBuf[off++] = 0;
4512 pbCodeBuf[off++] = 0;
4513 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4514
4515 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4516 }
4517 }
4518 else
4519 {
4520 /* lea ret32, [index64 + base64 (+ disp32)] */
4521 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4522 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4523 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4524 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4525 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4526 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4527 pbCodeBuf[off++] = 0x8d;
4528 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4529 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4530 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4531 if (bMod == X86_MOD_MEM4)
4532 {
4533 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4534 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4535 pbCodeBuf[off++] = 0;
4536 pbCodeBuf[off++] = 0;
4537 }
4538 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4539 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4540 }
4541
4542#elif defined(RT_ARCH_ARM64)
4543 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4544 if (u16EffAddr == 0)
4545 {
4546 if (idxRegIndex == UINT8_MAX)
4547 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4548 else
4549 {
4550 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4551 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4552 }
4553 }
4554 else
4555 {
4556 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4557 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4558 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4559 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4560 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4561 else
4562 {
4563 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4564 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4565 }
4566 if (idxRegIndex != UINT8_MAX)
4567 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4568 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4569 }
4570
4571#else
4572# error "port me"
4573#endif
4574
4575 if (idxRegIndex != UINT8_MAX)
4576 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4577 iemNativeRegFreeTmp(pReNative, idxRegBase);
4578 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4579 return off;
4580}
4581
4582
4583#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4584 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4585
4586/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4587 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4588DECL_INLINE_THROW(uint32_t)
4589iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4590 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4591{
4592 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4593
4594 /*
4595 * Handle the disp32 form with no registers first.
4596 *
4597 * Convert to an immediate value, as that'll delay the register allocation
4598 * and assignment till the memory access / call / whatever and we can use
4599 * a more appropriate register (or none at all).
4600 */
4601 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4602 {
4603 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4604 return off;
4605 }
4606
4607 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4608 uint32_t u32EffAddr = 0;
4609 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4610 {
4611 case 0: break;
4612 case 1: u32EffAddr = (int8_t)u32Disp; break;
4613 case 2: u32EffAddr = u32Disp; break;
4614 default: AssertFailed();
4615 }
4616
4617 /* Get the register (or SIB) value. */
4618 uint8_t idxGstRegBase = UINT8_MAX;
4619 uint8_t idxGstRegIndex = UINT8_MAX;
4620 uint8_t cShiftIndex = 0;
4621 switch (bRm & X86_MODRM_RM_MASK)
4622 {
4623 case 0: idxGstRegBase = X86_GREG_xAX; break;
4624 case 1: idxGstRegBase = X86_GREG_xCX; break;
4625 case 2: idxGstRegBase = X86_GREG_xDX; break;
4626 case 3: idxGstRegBase = X86_GREG_xBX; break;
4627 case 4: /* SIB */
4628 {
4629 /* index /w scaling . */
4630 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4631 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4632 {
4633 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4634 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4635 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4636 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4637 case 4: cShiftIndex = 0; /*no index*/ break;
4638 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4639 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4640 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4641 }
4642
4643 /* base */
4644 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4645 {
4646 case 0: idxGstRegBase = X86_GREG_xAX; break;
4647 case 1: idxGstRegBase = X86_GREG_xCX; break;
4648 case 2: idxGstRegBase = X86_GREG_xDX; break;
4649 case 3: idxGstRegBase = X86_GREG_xBX; break;
4650 case 4:
4651 idxGstRegBase = X86_GREG_xSP;
4652 u32EffAddr += uSibAndRspOffset >> 8;
4653 break;
4654 case 5:
4655 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4656 idxGstRegBase = X86_GREG_xBP;
4657 else
4658 {
4659 Assert(u32EffAddr == 0);
4660 u32EffAddr = u32Disp;
4661 }
4662 break;
4663 case 6: idxGstRegBase = X86_GREG_xSI; break;
4664 case 7: idxGstRegBase = X86_GREG_xDI; break;
4665 }
4666 break;
4667 }
4668 case 5: idxGstRegBase = X86_GREG_xBP; break;
4669 case 6: idxGstRegBase = X86_GREG_xSI; break;
4670 case 7: idxGstRegBase = X86_GREG_xDI; break;
4671 }
4672
4673 /*
4674 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4675 * the start of the function.
4676 */
4677 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4678 {
4679 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4680 return off;
4681 }
4682
4683 /*
4684 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4685 */
4686 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4687 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4688 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4689 kIemNativeGstRegUse_ReadOnly);
4690 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4691 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4692 kIemNativeGstRegUse_ReadOnly);
4693
4694 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4695 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4696 {
4697 idxRegBase = idxRegIndex;
4698 idxRegIndex = UINT8_MAX;
4699 }
4700
4701#ifdef RT_ARCH_AMD64
4702 if (idxRegIndex == UINT8_MAX)
4703 {
4704 if (u32EffAddr == 0)
4705 {
4706 /* mov ret, base */
4707 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4708 }
4709 else
4710 {
4711 /* lea ret32, [base64 + disp32] */
4712 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4713 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4714 if (idxRegRet >= 8 || idxRegBase >= 8)
4715 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4716 pbCodeBuf[off++] = 0x8d;
4717 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4718 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4719 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4720 else
4721 {
4722 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4723 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4724 }
4725 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4726 if (bMod == X86_MOD_MEM4)
4727 {
4728 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4729 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4730 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4731 }
4732 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4733 }
4734 }
4735 else
4736 {
4737 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4738 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4739 if (idxRegBase == UINT8_MAX)
4740 {
4741 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4742 if (idxRegRet >= 8 || idxRegIndex >= 8)
4743 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4744 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4745 pbCodeBuf[off++] = 0x8d;
4746 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4747 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4748 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4749 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4750 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4751 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4752 }
4753 else
4754 {
4755 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4756 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4757 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4758 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4759 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4760 pbCodeBuf[off++] = 0x8d;
4761 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4762 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4763 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4764 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4765 if (bMod != X86_MOD_MEM0)
4766 {
4767 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4768 if (bMod == X86_MOD_MEM4)
4769 {
4770 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4771 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4772 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4773 }
4774 }
4775 }
4776 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4777 }
4778
4779#elif defined(RT_ARCH_ARM64)
4780 if (u32EffAddr == 0)
4781 {
4782 if (idxRegIndex == UINT8_MAX)
4783 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4784 else if (idxRegBase == UINT8_MAX)
4785 {
4786 if (cShiftIndex == 0)
4787 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4788 else
4789 {
4790 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4791 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4792 }
4793 }
4794 else
4795 {
4796 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4797 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4798 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4799 }
4800 }
4801 else
4802 {
4803 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4804 {
4805 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4806 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4807 }
4808 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4809 {
4810 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4811 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4812 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4813 }
4814 else
4815 {
4816 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4817 if (idxRegBase != UINT8_MAX)
4818 {
4819 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4820 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4821 }
4822 }
4823 if (idxRegIndex != UINT8_MAX)
4824 {
4825 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4826 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4827 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4828 }
4829 }
4830
4831#else
4832# error "port me"
4833#endif
4834
4835 if (idxRegIndex != UINT8_MAX)
4836 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4837 if (idxRegBase != UINT8_MAX)
4838 iemNativeRegFreeTmp(pReNative, idxRegBase);
4839 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4840 return off;
4841}
4842
4843
4844#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4845 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4846 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4847
4848#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4849 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4850 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4851
4852#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4853 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4854 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4855
4856/**
4857 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4858 *
4859 * @returns New off.
4860 * @param pReNative .
4861 * @param off .
4862 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4863 * bit 4 to REX.X. The two bits are part of the
4864 * REG sub-field, which isn't needed in this
4865 * function.
4866 * @param uSibAndRspOffset Two parts:
4867 * - The first 8 bits make up the SIB byte.
4868 * - The next 8 bits are the fixed RSP/ESP offset
4869 * in case of a pop [xSP].
4870 * @param u32Disp The displacement byte/word/dword, if any.
4871 * @param cbInstr The size of the fully decoded instruction. Used
4872 * for RIP relative addressing.
4873 * @param idxVarRet The result variable number.
4874 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4875 * when calculating the address.
4876 *
4877 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4878 */
4879DECL_INLINE_THROW(uint32_t)
4880iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4881 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4882{
4883 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4884
4885 /*
4886 * Special case the rip + disp32 form first.
4887 */
4888 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4889 {
4890#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4891 /* Need to take the current PC offset into account for the displacement, no need to flush here
4892 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4893 u32Disp += pReNative->Core.offPc;
4894#endif
4895
4896 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4897 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4898 kIemNativeGstRegUse_ReadOnly);
4899#ifdef RT_ARCH_AMD64
4900 if (f64Bit)
4901 {
4902 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4903 if ((int32_t)offFinalDisp == offFinalDisp)
4904 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4905 else
4906 {
4907 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4908 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4909 }
4910 }
4911 else
4912 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4913
4914#elif defined(RT_ARCH_ARM64)
4915 if (f64Bit)
4916 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4917 (int64_t)(int32_t)u32Disp + cbInstr);
4918 else
4919 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4920 (int32_t)u32Disp + cbInstr);
4921
4922#else
4923# error "Port me!"
4924#endif
4925 iemNativeRegFreeTmp(pReNative, idxRegPc);
4926 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4927 return off;
4928 }
4929
4930 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4931 int64_t i64EffAddr = 0;
4932 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4933 {
4934 case 0: break;
4935 case 1: i64EffAddr = (int8_t)u32Disp; break;
4936 case 2: i64EffAddr = (int32_t)u32Disp; break;
4937 default: AssertFailed();
4938 }
4939
4940 /* Get the register (or SIB) value. */
4941 uint8_t idxGstRegBase = UINT8_MAX;
4942 uint8_t idxGstRegIndex = UINT8_MAX;
4943 uint8_t cShiftIndex = 0;
4944 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4945 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4946 else /* SIB: */
4947 {
4948 /* index /w scaling . */
4949 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4950 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4951 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4952 if (idxGstRegIndex == 4)
4953 {
4954 /* no index */
4955 cShiftIndex = 0;
4956 idxGstRegIndex = UINT8_MAX;
4957 }
4958
4959 /* base */
4960 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4961 if (idxGstRegBase == 4)
4962 {
4963 /* pop [rsp] hack */
4964 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4965 }
4966 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4967 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4968 {
4969 /* mod=0 and base=5 -> disp32, no base reg. */
4970 Assert(i64EffAddr == 0);
4971 i64EffAddr = (int32_t)u32Disp;
4972 idxGstRegBase = UINT8_MAX;
4973 }
4974 }
4975
4976 /*
4977 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4978 * the start of the function.
4979 */
4980 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4981 {
4982 if (f64Bit)
4983 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4984 else
4985 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4986 return off;
4987 }
4988
4989 /*
4990 * Now emit code that calculates:
4991 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4992 * or if !f64Bit:
4993 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4994 */
4995 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4996 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4997 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4998 kIemNativeGstRegUse_ReadOnly);
4999 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5000 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5001 kIemNativeGstRegUse_ReadOnly);
5002
5003 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5004 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5005 {
5006 idxRegBase = idxRegIndex;
5007 idxRegIndex = UINT8_MAX;
5008 }
5009
5010#ifdef RT_ARCH_AMD64
5011 uint8_t bFinalAdj;
5012 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
5013 bFinalAdj = 0; /* likely */
5014 else
5015 {
5016 /* pop [rsp] with a problematic disp32 value. Split out the
5017 RSP offset and add it separately afterwards (bFinalAdj). */
5018 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
5019 Assert(idxGstRegBase == X86_GREG_xSP);
5020 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
5021 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
5022 Assert(bFinalAdj != 0);
5023 i64EffAddr -= bFinalAdj;
5024 Assert((int32_t)i64EffAddr == i64EffAddr);
5025 }
5026 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
5027//pReNative->pInstrBuf[off++] = 0xcc;
5028
5029 if (idxRegIndex == UINT8_MAX)
5030 {
5031 if (u32EffAddr == 0)
5032 {
5033 /* mov ret, base */
5034 if (f64Bit)
5035 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
5036 else
5037 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5038 }
5039 else
5040 {
5041 /* lea ret, [base + disp32] */
5042 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5043 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5044 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
5045 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5046 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5047 | (f64Bit ? X86_OP_REX_W : 0);
5048 pbCodeBuf[off++] = 0x8d;
5049 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5050 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5051 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5052 else
5053 {
5054 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5055 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5056 }
5057 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5058 if (bMod == X86_MOD_MEM4)
5059 {
5060 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5061 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5062 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5063 }
5064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5065 }
5066 }
5067 else
5068 {
5069 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5070 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5071 if (idxRegBase == UINT8_MAX)
5072 {
5073 /* lea ret, [(index64 << cShiftIndex) + disp32] */
5074 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
5075 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5076 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5077 | (f64Bit ? X86_OP_REX_W : 0);
5078 pbCodeBuf[off++] = 0x8d;
5079 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5080 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5081 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5082 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5083 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5084 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5085 }
5086 else
5087 {
5088 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5089 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5090 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5091 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5092 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5093 | (f64Bit ? X86_OP_REX_W : 0);
5094 pbCodeBuf[off++] = 0x8d;
5095 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5096 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5097 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5098 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5099 if (bMod != X86_MOD_MEM0)
5100 {
5101 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5102 if (bMod == X86_MOD_MEM4)
5103 {
5104 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5105 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5106 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5107 }
5108 }
5109 }
5110 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5111 }
5112
5113 if (!bFinalAdj)
5114 { /* likely */ }
5115 else
5116 {
5117 Assert(f64Bit);
5118 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
5119 }
5120
5121#elif defined(RT_ARCH_ARM64)
5122 if (i64EffAddr == 0)
5123 {
5124 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5125 if (idxRegIndex == UINT8_MAX)
5126 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
5127 else if (idxRegBase != UINT8_MAX)
5128 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5129 f64Bit, false /*fSetFlags*/, cShiftIndex);
5130 else
5131 {
5132 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
5133 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
5134 }
5135 }
5136 else
5137 {
5138 if (f64Bit)
5139 { /* likely */ }
5140 else
5141 i64EffAddr = (int32_t)i64EffAddr;
5142
5143 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
5144 {
5145 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5146 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5147 }
5148 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5149 {
5150 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5151 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5152 }
5153 else
5154 {
5155 if (f64Bit)
5156 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5157 else
5158 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5159 if (idxRegBase != UINT8_MAX)
5160 {
5161 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5162 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5163 }
5164 }
5165 if (idxRegIndex != UINT8_MAX)
5166 {
5167 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5168 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5169 f64Bit, false /*fSetFlags*/, cShiftIndex);
5170 }
5171 }
5172
5173#else
5174# error "port me"
5175#endif
5176
5177 if (idxRegIndex != UINT8_MAX)
5178 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5179 if (idxRegBase != UINT8_MAX)
5180 iemNativeRegFreeTmp(pReNative, idxRegBase);
5181 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5182 return off;
5183}
5184
5185
5186/*********************************************************************************************************************************
5187* Memory fetches and stores common *
5188*********************************************************************************************************************************/
5189
5190typedef enum IEMNATIVEMITMEMOP
5191{
5192 kIemNativeEmitMemOp_Store = 0,
5193 kIemNativeEmitMemOp_Fetch,
5194 kIemNativeEmitMemOp_Fetch_Zx_U16,
5195 kIemNativeEmitMemOp_Fetch_Zx_U32,
5196 kIemNativeEmitMemOp_Fetch_Zx_U64,
5197 kIemNativeEmitMemOp_Fetch_Sx_U16,
5198 kIemNativeEmitMemOp_Fetch_Sx_U32,
5199 kIemNativeEmitMemOp_Fetch_Sx_U64
5200} IEMNATIVEMITMEMOP;
5201
5202/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5203 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5204 * (with iSegReg = UINT8_MAX). */
5205DECL_INLINE_THROW(uint32_t)
5206iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5207 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5208 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5209{
5210 /*
5211 * Assert sanity.
5212 */
5213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5214 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5215 Assert( enmOp != kIemNativeEmitMemOp_Store
5216 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5217 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5218 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5219 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5220 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5221 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5222 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5223 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5224#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5225 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5226 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5227#else
5228 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5229#endif
5230 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5231#ifdef VBOX_STRICT
5232 if (iSegReg == UINT8_MAX)
5233 {
5234 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5235 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5236 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5237 switch (cbMem)
5238 {
5239 case 1:
5240 Assert( pfnFunction
5241 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5242 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5243 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5244 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5245 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5246 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5247 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5248 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5249 : UINT64_C(0xc000b000a0009000) ));
5250 break;
5251 case 2:
5252 Assert( pfnFunction
5253 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5254 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5255 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5256 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5257 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5258 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5259 : UINT64_C(0xc000b000a0009000) ));
5260 break;
5261 case 4:
5262 Assert( pfnFunction
5263 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5264 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5265 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5266 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5267 : UINT64_C(0xc000b000a0009000) ));
5268 break;
5269 case 8:
5270 Assert( pfnFunction
5271 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5272 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5273 : UINT64_C(0xc000b000a0009000) ));
5274 break;
5275#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5276 case sizeof(RTUINT128U):
5277 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5278 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5279 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5280 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5281 || ( enmOp == kIemNativeEmitMemOp_Store
5282 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5283 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5284 break;
5285 case sizeof(RTUINT256U):
5286 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5287 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5288 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5289 || ( enmOp == kIemNativeEmitMemOp_Store
5290 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5291 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5292 break;
5293#endif
5294 }
5295 }
5296 else
5297 {
5298 Assert(iSegReg < 6);
5299 switch (cbMem)
5300 {
5301 case 1:
5302 Assert( pfnFunction
5303 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5304 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5305 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5306 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5307 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5308 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5309 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5310 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5311 : UINT64_C(0xc000b000a0009000) ));
5312 break;
5313 case 2:
5314 Assert( pfnFunction
5315 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5316 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5317 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5318 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5319 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5320 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5321 : UINT64_C(0xc000b000a0009000) ));
5322 break;
5323 case 4:
5324 Assert( pfnFunction
5325 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5326 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5327 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5328 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5329 : UINT64_C(0xc000b000a0009000) ));
5330 break;
5331 case 8:
5332 Assert( pfnFunction
5333 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5334 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5335 : UINT64_C(0xc000b000a0009000) ));
5336 break;
5337#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5338 case sizeof(RTUINT128U):
5339 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5340 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5341 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5342 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5343 || ( enmOp == kIemNativeEmitMemOp_Store
5344 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5345 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5346 break;
5347 case sizeof(RTUINT256U):
5348 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5349 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5350 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5351 || ( enmOp == kIemNativeEmitMemOp_Store
5352 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5353 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5354 break;
5355#endif
5356 }
5357 }
5358#endif
5359
5360#ifdef VBOX_STRICT
5361 /*
5362 * Check that the fExec flags we've got make sense.
5363 */
5364 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5365#endif
5366
5367 /*
5368 * To keep things simple we have to commit any pending writes first as we
5369 * may end up making calls.
5370 */
5371 /** @todo we could postpone this till we make the call and reload the
5372 * registers after returning from the call. Not sure if that's sensible or
5373 * not, though. */
5374#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5375 off = iemNativeRegFlushPendingWrites(pReNative, off);
5376#else
5377 /* The program counter is treated differently for now. */
5378 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5379#endif
5380
5381#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5382 /*
5383 * Move/spill/flush stuff out of call-volatile registers.
5384 * This is the easy way out. We could contain this to the tlb-miss branch
5385 * by saving and restoring active stuff here.
5386 */
5387 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5388#endif
5389
5390 /*
5391 * Define labels and allocate the result register (trying for the return
5392 * register if we can).
5393 */
5394 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5395#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5396 uint8_t idxRegValueFetch = UINT8_MAX;
5397
5398 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5399 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5400 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5401 else
5402 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5403 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5404 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5405 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5406#else
5407 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5408 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5409 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5410 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5411#endif
5412 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5413
5414#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5415 uint8_t idxRegValueStore = UINT8_MAX;
5416
5417 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5418 idxRegValueStore = !TlbState.fSkip
5419 && enmOp == kIemNativeEmitMemOp_Store
5420 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5421 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5422 : UINT8_MAX;
5423 else
5424 idxRegValueStore = !TlbState.fSkip
5425 && enmOp == kIemNativeEmitMemOp_Store
5426 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5427 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5428 : UINT8_MAX;
5429
5430#else
5431 uint8_t const idxRegValueStore = !TlbState.fSkip
5432 && enmOp == kIemNativeEmitMemOp_Store
5433 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5434 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5435 : UINT8_MAX;
5436#endif
5437 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5438 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5439 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5440 : UINT32_MAX;
5441
5442 /*
5443 * Jump to the TLB lookup code.
5444 */
5445 if (!TlbState.fSkip)
5446 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5447
5448 /*
5449 * TlbMiss:
5450 *
5451 * Call helper to do the fetching.
5452 * We flush all guest register shadow copies here.
5453 */
5454 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5455
5456#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5457 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5458#else
5459 RT_NOREF(idxInstr);
5460#endif
5461
5462#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5463 if (pReNative->Core.offPc)
5464 {
5465 /*
5466 * Update the program counter but restore it at the end of the TlbMiss branch.
5467 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5468 * which are hopefully much more frequent, reducing the amount of memory accesses.
5469 */
5470 /* Allocate a temporary PC register. */
5471 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5472
5473 /* Perform the addition and store the result. */
5474 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5475 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5476
5477 /* Free and flush the PC register. */
5478 iemNativeRegFreeTmp(pReNative, idxPcReg);
5479 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5480 }
5481#endif
5482
5483#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5484 /* Save variables in volatile registers. */
5485 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5486 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5487 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5488 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5489#endif
5490
5491 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5492 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5493#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5494 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5495 {
5496 /*
5497 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5498 *
5499 * @note There was a register variable assigned to the variable for the TlbLookup case above
5500 * which must not be freed or the value loaded into the register will not be synced into the register
5501 * further down the road because the variable doesn't know it had a variable assigned.
5502 *
5503 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5504 * as it will be overwritten anyway.
5505 */
5506 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5507 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5508 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5509 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5510 }
5511 else
5512#endif
5513 if (enmOp == kIemNativeEmitMemOp_Store)
5514 {
5515 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5516 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5517#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5518 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5519#else
5520 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5521 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5522#endif
5523 }
5524
5525 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5526 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5527#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5528 fVolGregMask);
5529#else
5530 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5531#endif
5532
5533 if (iSegReg != UINT8_MAX)
5534 {
5535 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5536 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5537 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5538 }
5539
5540 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5541 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5542
5543 /* Done setting up parameters, make the call. */
5544 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5545
5546 /*
5547 * Put the result in the right register if this is a fetch.
5548 */
5549 if (enmOp != kIemNativeEmitMemOp_Store)
5550 {
5551#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5552 if ( cbMem == sizeof(RTUINT128U)
5553 || cbMem == sizeof(RTUINT256U))
5554 {
5555 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5556
5557 /* Sync the value on the stack with the host register assigned to the variable. */
5558 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5559 }
5560 else
5561#endif
5562 {
5563 Assert(idxRegValueFetch == pVarValue->idxReg);
5564 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5565 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5566 }
5567 }
5568
5569#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5570 /* Restore variables and guest shadow registers to volatile registers. */
5571 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5572 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5573#endif
5574
5575#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5576 if (pReNative->Core.offPc)
5577 {
5578 /*
5579 * Time to restore the program counter to its original value.
5580 */
5581 /* Allocate a temporary PC register. */
5582 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5583
5584 /* Restore the original value. */
5585 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5586 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5587
5588 /* Free and flush the PC register. */
5589 iemNativeRegFreeTmp(pReNative, idxPcReg);
5590 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5591 }
5592#endif
5593
5594#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5595 if (!TlbState.fSkip)
5596 {
5597 /* end of TlbMiss - Jump to the done label. */
5598 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5599 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5600
5601 /*
5602 * TlbLookup:
5603 */
5604 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5605 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5606 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5607
5608 /*
5609 * Emit code to do the actual storing / fetching.
5610 */
5611 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5612# ifdef VBOX_WITH_STATISTICS
5613 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5614 enmOp == kIemNativeEmitMemOp_Store
5615 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5616 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5617# endif
5618 switch (enmOp)
5619 {
5620 case kIemNativeEmitMemOp_Store:
5621 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5622 {
5623 switch (cbMem)
5624 {
5625 case 1:
5626 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5627 break;
5628 case 2:
5629 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5630 break;
5631 case 4:
5632 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5633 break;
5634 case 8:
5635 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5636 break;
5637#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5638 case sizeof(RTUINT128U):
5639 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5640 break;
5641 case sizeof(RTUINT256U):
5642 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5643 break;
5644#endif
5645 default:
5646 AssertFailed();
5647 }
5648 }
5649 else
5650 {
5651 switch (cbMem)
5652 {
5653 case 1:
5654 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5655 idxRegMemResult, TlbState.idxReg1);
5656 break;
5657 case 2:
5658 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5659 idxRegMemResult, TlbState.idxReg1);
5660 break;
5661 case 4:
5662 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5663 idxRegMemResult, TlbState.idxReg1);
5664 break;
5665 case 8:
5666 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5667 idxRegMemResult, TlbState.idxReg1);
5668 break;
5669 default:
5670 AssertFailed();
5671 }
5672 }
5673 break;
5674
5675 case kIemNativeEmitMemOp_Fetch:
5676 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5677 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5678 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5679 switch (cbMem)
5680 {
5681 case 1:
5682 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5683 break;
5684 case 2:
5685 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5686 break;
5687 case 4:
5688 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5689 break;
5690 case 8:
5691 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5692 break;
5693#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5694 case sizeof(RTUINT128U):
5695 /*
5696 * No need to sync back the register with the stack, this is done by the generic variable handling
5697 * code if there is a register assigned to a variable and the stack must be accessed.
5698 */
5699 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5700 break;
5701 case sizeof(RTUINT256U):
5702 /*
5703 * No need to sync back the register with the stack, this is done by the generic variable handling
5704 * code if there is a register assigned to a variable and the stack must be accessed.
5705 */
5706 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5707 break;
5708#endif
5709 default:
5710 AssertFailed();
5711 }
5712 break;
5713
5714 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5715 Assert(cbMem == 1);
5716 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5717 break;
5718
5719 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5720 Assert(cbMem == 1 || cbMem == 2);
5721 if (cbMem == 1)
5722 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5723 else
5724 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5725 break;
5726
5727 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5728 switch (cbMem)
5729 {
5730 case 1:
5731 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5732 break;
5733 case 2:
5734 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5735 break;
5736 case 4:
5737 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5738 break;
5739 default:
5740 AssertFailed();
5741 }
5742 break;
5743
5744 default:
5745 AssertFailed();
5746 }
5747
5748 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5749
5750 /*
5751 * TlbDone:
5752 */
5753 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5754
5755 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5756
5757# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5758 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5759 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5760# endif
5761 }
5762#else
5763 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5764#endif
5765
5766 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5767 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5768 return off;
5769}
5770
5771
5772
5773/*********************************************************************************************************************************
5774* Memory fetches (IEM_MEM_FETCH_XXX). *
5775*********************************************************************************************************************************/
5776
5777/* 8-bit segmented: */
5778#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5779 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5780 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5781 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5782
5783#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5784 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5785 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5786 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5787
5788#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5789 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5790 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5791 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5792
5793#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5794 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5795 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5796 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5797
5798#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5799 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5800 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5801 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5802
5803#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5804 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5805 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5806 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5807
5808#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5809 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5810 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5811 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5812
5813/* 16-bit segmented: */
5814#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5815 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5816 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5817 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5818
5819#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5820 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5821 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5822 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5823
5824#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5825 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5826 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5827 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5828
5829#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5830 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5831 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5832 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5833
5834#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5835 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5836 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5837 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5838
5839#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5840 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5841 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5842 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5843
5844
5845/* 32-bit segmented: */
5846#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5847 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5848 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5849 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5850
5851#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5852 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5853 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5854 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5855
5856#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5857 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5858 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5859 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5860
5861#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5862 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5863 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5864 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5865
5866#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
5867 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
5868 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5869 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5870
5871#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
5872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
5873 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5874 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5875
5876#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
5877 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
5878 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5879 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5880
5881AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5882#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5883 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5884 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5885 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5886
5887
5888/* 64-bit segmented: */
5889#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5890 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5891 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5892 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5893
5894AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5895#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5896 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5897 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5898 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5899
5900
5901/* 8-bit flat: */
5902#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5904 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5905 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5906
5907#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5908 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5909 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5910 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5911
5912#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5913 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5914 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5915 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5916
5917#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5919 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5920 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5921
5922#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5923 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5924 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5925 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5926
5927#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5928 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5929 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5930 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5931
5932#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5933 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5934 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5935 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5936
5937
5938/* 16-bit flat: */
5939#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5940 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5941 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5942 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5943
5944#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5945 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5946 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5947 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5948
5949#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5950 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5951 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5952 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5953
5954#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5955 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5956 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5957 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5958
5959#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5960 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5961 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5962 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5963
5964#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5965 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5966 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5967 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5968
5969/* 32-bit flat: */
5970#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5971 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5972 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5973 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5974
5975#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5976 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5977 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5978 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5979
5980#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5981 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5982 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5983 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5984
5985#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5987 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5988 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5989
5990#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
5991 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
5992 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5993 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5994
5995#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
5996 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
5997 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5998 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5999
6000#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
6001 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
6002 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6003 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6004
6005#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
6006 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
6007 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6008 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6009
6010
6011/* 64-bit flat: */
6012#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
6013 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6014 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6015 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6016
6017#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
6018 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
6019 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6020 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6021
6022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6023/* 128-bit segmented: */
6024#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
6025 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6026 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6027 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
6028
6029#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
6030 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6031 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6032 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6033
6034AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
6035#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
6036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
6037 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6038 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6039
6040#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
6041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6042 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6043 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
6044
6045/* 128-bit flat: */
6046#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
6047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6048 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6049 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
6050
6051#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
6052 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6053 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6054 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6055
6056#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
6057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
6058 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6059 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6060
6061#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
6062 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6063 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6064 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
6065
6066/* 256-bit segmented: */
6067#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
6068 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6069 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6070 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6071
6072#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
6073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6074 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6075 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6076
6077#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
6078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6079 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6080 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6081
6082
6083/* 256-bit flat: */
6084#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
6085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6086 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6087 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6088
6089#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
6090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6091 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6092 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6093
6094#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
6095 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6096 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6097 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6098#endif
6099
6100
6101/*********************************************************************************************************************************
6102* Memory stores (IEM_MEM_STORE_XXX). *
6103*********************************************************************************************************************************/
6104
6105#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
6106 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
6107 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6108 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6109
6110#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
6111 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
6112 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6113 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6114
6115#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
6116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
6117 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6118 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6119
6120#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
6121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
6122 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6123 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6124
6125
6126#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
6127 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
6128 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6129 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6130
6131#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
6132 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
6133 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6134 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6135
6136#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
6137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
6138 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6139 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6140
6141#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
6142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
6143 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6144 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6145
6146
6147#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
6148 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6149 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6150
6151#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
6152 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6153 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6154
6155#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
6156 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6157 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6158
6159#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
6160 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6161 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6162
6163
6164#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
6165 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6166 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6167
6168#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
6169 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6170 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6171
6172#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
6173 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6174 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6175
6176#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6177 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6178 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6179
6180/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6181 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6182DECL_INLINE_THROW(uint32_t)
6183iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6184 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6185{
6186 /*
6187 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6188 * to do the grunt work.
6189 */
6190 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6191 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6192 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6193 pfnFunction, idxInstr);
6194 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6195 return off;
6196}
6197
6198
6199#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6200# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6201 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6202 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6203 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6204
6205# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6206 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6207 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6208 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6209
6210# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6212 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6213 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6214
6215# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6217 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6218 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6219
6220
6221# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6222 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6223 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6224 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6225
6226# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6228 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6229 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6230
6231# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6233 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6234 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6235
6236# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6238 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6239 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6240#endif
6241
6242
6243
6244/*********************************************************************************************************************************
6245* Stack Accesses. *
6246*********************************************************************************************************************************/
6247/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6248#define IEM_MC_PUSH_U16(a_u16Value) \
6249 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6250 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6251#define IEM_MC_PUSH_U32(a_u32Value) \
6252 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6253 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6254#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6255 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6256 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6257#define IEM_MC_PUSH_U64(a_u64Value) \
6258 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6259 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6260
6261#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6262 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6263 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6264#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6265 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6266 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6267#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6268 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6269 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6270
6271#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6272 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6273 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6274#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6275 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6276 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6277
6278
6279DECL_FORCE_INLINE_THROW(uint32_t)
6280iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6281{
6282 /* Use16BitSp: */
6283#ifdef RT_ARCH_AMD64
6284 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6285 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6286#else
6287 /* sub regeff, regrsp, #cbMem */
6288 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6289 /* and regeff, regeff, #0xffff */
6290 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6291 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6292 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6293 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6294#endif
6295 return off;
6296}
6297
6298
6299DECL_FORCE_INLINE(uint32_t)
6300iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6301{
6302 /* Use32BitSp: */
6303 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6304 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6305 return off;
6306}
6307
6308
6309/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6310DECL_INLINE_THROW(uint32_t)
6311iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6312 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6313{
6314 /*
6315 * Assert sanity.
6316 */
6317 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6318 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6319#ifdef VBOX_STRICT
6320 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6321 {
6322 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6323 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6324 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6325 Assert( pfnFunction
6326 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6327 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6328 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6329 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6330 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6331 : UINT64_C(0xc000b000a0009000) ));
6332 }
6333 else
6334 Assert( pfnFunction
6335 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6336 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6337 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6338 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6339 : UINT64_C(0xc000b000a0009000) ));
6340#endif
6341
6342#ifdef VBOX_STRICT
6343 /*
6344 * Check that the fExec flags we've got make sense.
6345 */
6346 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6347#endif
6348
6349 /*
6350 * To keep things simple we have to commit any pending writes first as we
6351 * may end up making calls.
6352 */
6353 /** @todo we could postpone this till we make the call and reload the
6354 * registers after returning from the call. Not sure if that's sensible or
6355 * not, though. */
6356 off = iemNativeRegFlushPendingWrites(pReNative, off);
6357
6358 /*
6359 * First we calculate the new RSP and the effective stack pointer value.
6360 * For 64-bit mode and flat 32-bit these two are the same.
6361 * (Code structure is very similar to that of PUSH)
6362 */
6363 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6364 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6365 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6366 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6367 ? cbMem : sizeof(uint16_t);
6368 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6369 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6370 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6371 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6372 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6373 if (cBitsFlat != 0)
6374 {
6375 Assert(idxRegEffSp == idxRegRsp);
6376 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6377 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6378 if (cBitsFlat == 64)
6379 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6380 else
6381 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6382 }
6383 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6384 {
6385 Assert(idxRegEffSp != idxRegRsp);
6386 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6387 kIemNativeGstRegUse_ReadOnly);
6388#ifdef RT_ARCH_AMD64
6389 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6390#else
6391 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6392#endif
6393 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6394 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6395 offFixupJumpToUseOtherBitSp = off;
6396 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6397 {
6398 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6399 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6400 }
6401 else
6402 {
6403 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6404 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6405 }
6406 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6407 }
6408 /* SpUpdateEnd: */
6409 uint32_t const offLabelSpUpdateEnd = off;
6410
6411 /*
6412 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6413 * we're skipping lookup).
6414 */
6415 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6416 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6417 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6418 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6419 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6420 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6421 : UINT32_MAX;
6422 uint8_t const idxRegValue = !TlbState.fSkip
6423 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6424 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6425 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6426 : UINT8_MAX;
6427 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6428
6429
6430 if (!TlbState.fSkip)
6431 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6432 else
6433 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6434
6435 /*
6436 * Use16BitSp:
6437 */
6438 if (cBitsFlat == 0)
6439 {
6440#ifdef RT_ARCH_AMD64
6441 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6442#else
6443 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6444#endif
6445 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6446 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6447 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6448 else
6449 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6450 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6452 }
6453
6454 /*
6455 * TlbMiss:
6456 *
6457 * Call helper to do the pushing.
6458 */
6459 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6460
6461#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6462 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6463#else
6464 RT_NOREF(idxInstr);
6465#endif
6466
6467 /* Save variables in volatile registers. */
6468 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6469 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6470 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6471 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6472 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6473
6474 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6475 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6476 {
6477 /* Swap them using ARG0 as temp register: */
6478 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6479 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6480 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6481 }
6482 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6483 {
6484 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6485 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6486 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6487
6488 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6489 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6490 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6491 }
6492 else
6493 {
6494 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6495 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6496
6497 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6498 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6499 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6500 }
6501
6502 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6503 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6504
6505 /* Done setting up parameters, make the call. */
6506 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6507
6508 /* Restore variables and guest shadow registers to volatile registers. */
6509 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6510 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6511
6512#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6513 if (!TlbState.fSkip)
6514 {
6515 /* end of TlbMiss - Jump to the done label. */
6516 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6517 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6518
6519 /*
6520 * TlbLookup:
6521 */
6522 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6523 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6524
6525 /*
6526 * Emit code to do the actual storing / fetching.
6527 */
6528 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6529# ifdef VBOX_WITH_STATISTICS
6530 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6531 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6532# endif
6533 if (idxRegValue != UINT8_MAX)
6534 {
6535 switch (cbMemAccess)
6536 {
6537 case 2:
6538 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6539 break;
6540 case 4:
6541 if (!fIsIntelSeg)
6542 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6543 else
6544 {
6545 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6546 PUSH FS in real mode, so we have to try emulate that here.
6547 We borrow the now unused idxReg1 from the TLB lookup code here. */
6548 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6549 kIemNativeGstReg_EFlags);
6550 if (idxRegEfl != UINT8_MAX)
6551 {
6552#ifdef ARCH_AMD64
6553 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6554 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6555 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6556#else
6557 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6558 off, TlbState.idxReg1, idxRegEfl,
6559 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6560#endif
6561 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6562 }
6563 else
6564 {
6565 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6566 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6567 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6568 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6569 }
6570 /* ASSUMES the upper half of idxRegValue is ZERO. */
6571 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6572 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6573 }
6574 break;
6575 case 8:
6576 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6577 break;
6578 default:
6579 AssertFailed();
6580 }
6581 }
6582 else
6583 {
6584 switch (cbMemAccess)
6585 {
6586 case 2:
6587 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6588 idxRegMemResult, TlbState.idxReg1);
6589 break;
6590 case 4:
6591 Assert(!fIsSegReg);
6592 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6593 idxRegMemResult, TlbState.idxReg1);
6594 break;
6595 case 8:
6596 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6597 break;
6598 default:
6599 AssertFailed();
6600 }
6601 }
6602
6603 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6604 TlbState.freeRegsAndReleaseVars(pReNative);
6605
6606 /*
6607 * TlbDone:
6608 *
6609 * Commit the new RSP value.
6610 */
6611 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6612 }
6613#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6614
6615#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6616 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6617#endif
6618 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6619 if (idxRegEffSp != idxRegRsp)
6620 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6621
6622 /* The value variable is implictly flushed. */
6623 if (idxRegValue != UINT8_MAX)
6624 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6625 iemNativeVarFreeLocal(pReNative, idxVarValue);
6626
6627 return off;
6628}
6629
6630
6631
6632/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6633#define IEM_MC_POP_GREG_U16(a_iGReg) \
6634 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6635 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6636#define IEM_MC_POP_GREG_U32(a_iGReg) \
6637 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6638 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6639#define IEM_MC_POP_GREG_U64(a_iGReg) \
6640 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6641 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6642
6643#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6644 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6645 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6646#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6647 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6648 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6649
6650#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6651 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6652 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6653#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6654 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6655 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6656
6657
6658DECL_FORCE_INLINE_THROW(uint32_t)
6659iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6660 uint8_t idxRegTmp)
6661{
6662 /* Use16BitSp: */
6663#ifdef RT_ARCH_AMD64
6664 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6665 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6666 RT_NOREF(idxRegTmp);
6667#else
6668 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6669 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6670 /* add tmp, regrsp, #cbMem */
6671 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6672 /* and tmp, tmp, #0xffff */
6673 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6674 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6675 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6676 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6677#endif
6678 return off;
6679}
6680
6681
6682DECL_FORCE_INLINE(uint32_t)
6683iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6684{
6685 /* Use32BitSp: */
6686 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6687 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6688 return off;
6689}
6690
6691
6692/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6693DECL_INLINE_THROW(uint32_t)
6694iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6695 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6696{
6697 /*
6698 * Assert sanity.
6699 */
6700 Assert(idxGReg < 16);
6701#ifdef VBOX_STRICT
6702 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6703 {
6704 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6705 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6706 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6707 Assert( pfnFunction
6708 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6709 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6710 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6711 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6712 : UINT64_C(0xc000b000a0009000) ));
6713 }
6714 else
6715 Assert( pfnFunction
6716 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6717 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6718 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6719 : UINT64_C(0xc000b000a0009000) ));
6720#endif
6721
6722#ifdef VBOX_STRICT
6723 /*
6724 * Check that the fExec flags we've got make sense.
6725 */
6726 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6727#endif
6728
6729 /*
6730 * To keep things simple we have to commit any pending writes first as we
6731 * may end up making calls.
6732 */
6733 off = iemNativeRegFlushPendingWrites(pReNative, off);
6734
6735 /*
6736 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6737 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6738 * directly as the effective stack pointer.
6739 * (Code structure is very similar to that of PUSH)
6740 */
6741 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6742 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6743 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6744 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6745 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6746 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6747 * will be the resulting register value. */
6748 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6749
6750 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6751 if (cBitsFlat != 0)
6752 {
6753 Assert(idxRegEffSp == idxRegRsp);
6754 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6755 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6756 }
6757 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6758 {
6759 Assert(idxRegEffSp != idxRegRsp);
6760 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6761 kIemNativeGstRegUse_ReadOnly);
6762#ifdef RT_ARCH_AMD64
6763 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6764#else
6765 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6766#endif
6767 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6768 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6769 offFixupJumpToUseOtherBitSp = off;
6770 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6771 {
6772/** @todo can skip idxRegRsp updating when popping ESP. */
6773 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6774 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6775 }
6776 else
6777 {
6778 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6779 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6780 }
6781 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6782 }
6783 /* SpUpdateEnd: */
6784 uint32_t const offLabelSpUpdateEnd = off;
6785
6786 /*
6787 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6788 * we're skipping lookup).
6789 */
6790 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6791 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6792 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6793 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6794 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6795 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6796 : UINT32_MAX;
6797
6798 if (!TlbState.fSkip)
6799 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6800 else
6801 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6802
6803 /*
6804 * Use16BitSp:
6805 */
6806 if (cBitsFlat == 0)
6807 {
6808#ifdef RT_ARCH_AMD64
6809 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6810#else
6811 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6812#endif
6813 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6814 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6815 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6816 else
6817 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6818 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6820 }
6821
6822 /*
6823 * TlbMiss:
6824 *
6825 * Call helper to do the pushing.
6826 */
6827 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6828
6829#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6830 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6831#else
6832 RT_NOREF(idxInstr);
6833#endif
6834
6835 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6836 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6837 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6838 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6839
6840
6841 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6842 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6844
6845 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6847
6848 /* Done setting up parameters, make the call. */
6849 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6850
6851 /* Move the return register content to idxRegMemResult. */
6852 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6853 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6854
6855 /* Restore variables and guest shadow registers to volatile registers. */
6856 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6857 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6858
6859#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6860 if (!TlbState.fSkip)
6861 {
6862 /* end of TlbMiss - Jump to the done label. */
6863 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6864 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6865
6866 /*
6867 * TlbLookup:
6868 */
6869 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6870 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6871
6872 /*
6873 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6874 */
6875 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6876# ifdef VBOX_WITH_STATISTICS
6877 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6878 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6879# endif
6880 switch (cbMem)
6881 {
6882 case 2:
6883 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6884 break;
6885 case 4:
6886 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6887 break;
6888 case 8:
6889 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6890 break;
6891 default:
6892 AssertFailed();
6893 }
6894
6895 TlbState.freeRegsAndReleaseVars(pReNative);
6896
6897 /*
6898 * TlbDone:
6899 *
6900 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6901 * commit the popped register value.
6902 */
6903 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6904 }
6905#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6906
6907 if (idxGReg != X86_GREG_xSP)
6908 {
6909 /* Set the register. */
6910 if (cbMem >= sizeof(uint32_t))
6911 {
6912#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6913 AssertMsg( pReNative->idxCurCall == 0
6914 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6915 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6916#endif
6917 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6918#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6919 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6920#endif
6921#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6922 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6923 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6924#endif
6925 }
6926 else
6927 {
6928 Assert(cbMem == sizeof(uint16_t));
6929 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6930 kIemNativeGstRegUse_ForUpdate);
6931 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6932#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6933 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6934#endif
6935 iemNativeRegFreeTmp(pReNative, idxRegDst);
6936 }
6937
6938 /* Complete RSP calculation for FLAT mode. */
6939 if (idxRegEffSp == idxRegRsp)
6940 {
6941 if (cBitsFlat == 64)
6942 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6943 else
6944 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6945 }
6946 }
6947 else
6948 {
6949 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6950 if (cbMem == sizeof(uint64_t))
6951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6952 else if (cbMem == sizeof(uint32_t))
6953 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6954 else
6955 {
6956 if (idxRegEffSp == idxRegRsp)
6957 {
6958 if (cBitsFlat == 64)
6959 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6960 else
6961 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6962 }
6963 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6964 }
6965 }
6966
6967#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6968 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6969#endif
6970
6971 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6972 if (idxRegEffSp != idxRegRsp)
6973 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6974 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6975
6976 return off;
6977}
6978
6979
6980
6981/*********************************************************************************************************************************
6982* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6983*********************************************************************************************************************************/
6984
6985#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6986 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6987 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6988 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6989
6990#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6991 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6992 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6993 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6994
6995#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6996 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6997 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6998 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6999
7000#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7001 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7002 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7003 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
7004
7005
7006#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7007 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7008 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7009 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
7010
7011#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7012 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7013 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7014 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
7015
7016#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7017 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7018 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7019 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7020
7021#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7022 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7023 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7024 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
7025
7026#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7027 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
7028 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7029 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7030
7031
7032#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7033 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7034 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7035 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
7036
7037#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7038 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7039 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7040 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
7041
7042#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7043 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7044 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7045 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7046
7047#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7048 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7049 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7050 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
7051
7052#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7053 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
7054 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7055 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7056
7057
7058#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7060 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7061 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
7062
7063#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7065 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7066 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
7067#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7068 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7069 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7070 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7071
7072#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7073 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7074 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7075 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
7076
7077#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7078 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
7079 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7080 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7081
7082
7083#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7084 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7085 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7086 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
7087
7088#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7089 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7090 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7091 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
7092
7093
7094#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7095 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7096 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7097 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
7098
7099#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7101 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7102 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
7103
7104#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7105 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7106 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7107 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
7108
7109#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7110 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7111 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7112 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
7113
7114
7115
7116#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7117 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7118 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7119 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
7120
7121#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7122 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7123 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7124 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
7125
7126#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7127 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7128 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7129 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
7130
7131#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7132 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7133 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7134 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
7135
7136
7137#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7139 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7140 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
7141
7142#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7143 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7144 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7145 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
7146
7147#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7148 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7149 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7150 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7151
7152#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7153 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7154 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7155 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
7156
7157#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
7158 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
7159 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7160 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7161
7162
7163#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7165 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7166 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
7167
7168#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7169 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7170 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7171 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
7172
7173#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7174 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7175 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7176 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7177
7178#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7179 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7180 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7181 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7182
7183#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7184 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7185 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7186 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7187
7188
7189#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7190 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7191 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7192 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7193
7194#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7195 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7196 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7197 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7198
7199#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7201 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7202 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7203
7204#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7206 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7207 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7208
7209#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7211 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7212 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7213
7214
7215#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7216 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7217 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7218 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7219
7220#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7222 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7223 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7224
7225
7226#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7227 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7228 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7229 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7230
7231#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7232 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7233 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7234 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7235
7236#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7237 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7238 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7239 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7240
7241#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7242 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7243 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7244 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7245
7246
7247DECL_INLINE_THROW(uint32_t)
7248iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7249 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7250 uintptr_t pfnFunction, uint8_t idxInstr)
7251{
7252 /*
7253 * Assert sanity.
7254 */
7255 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7256 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7257 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7258 && pVarMem->cbVar == sizeof(void *),
7259 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7260
7261 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7263 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7264 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7265 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7266
7267 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7269 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7270 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7271 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7272
7273 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7274
7275 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7276
7277#ifdef VBOX_STRICT
7278# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7279 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7280 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7281 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7282 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7283# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7284 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7285 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7286 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7287
7288 if (iSegReg == UINT8_MAX)
7289 {
7290 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7291 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7292 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7293 switch (cbMem)
7294 {
7295 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7296 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7297 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7298 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7299 case 10:
7300 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7301 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7302 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7303 break;
7304 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7305# if 0
7306 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7307 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7308# endif
7309 default: AssertFailed(); break;
7310 }
7311 }
7312 else
7313 {
7314 Assert(iSegReg < 6);
7315 switch (cbMem)
7316 {
7317 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7318 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7319 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7320 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7321 case 10:
7322 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7323 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7324 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7325 break;
7326 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7327# if 0
7328 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7329 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7330# endif
7331 default: AssertFailed(); break;
7332 }
7333 }
7334# undef IEM_MAP_HLP_FN
7335# undef IEM_MAP_HLP_FN_NO_AT
7336#endif
7337
7338#ifdef VBOX_STRICT
7339 /*
7340 * Check that the fExec flags we've got make sense.
7341 */
7342 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7343#endif
7344
7345 /*
7346 * To keep things simple we have to commit any pending writes first as we
7347 * may end up making calls.
7348 */
7349 off = iemNativeRegFlushPendingWrites(pReNative, off);
7350
7351#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7352 /*
7353 * Move/spill/flush stuff out of call-volatile registers.
7354 * This is the easy way out. We could contain this to the tlb-miss branch
7355 * by saving and restoring active stuff here.
7356 */
7357 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7358 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7359#endif
7360
7361 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7362 while the tlb-miss codepath will temporarily put it on the stack.
7363 Set the the type to stack here so we don't need to do it twice below. */
7364 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7365 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7366 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7367 * lookup is done. */
7368
7369 /*
7370 * Define labels and allocate the result register (trying for the return
7371 * register if we can).
7372 */
7373 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7374 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7375 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7376 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7377 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7378 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7379 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7380 : UINT32_MAX;
7381//off=iemNativeEmitBrk(pReNative, off, 0);
7382 /*
7383 * Jump to the TLB lookup code.
7384 */
7385 if (!TlbState.fSkip)
7386 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7387
7388 /*
7389 * TlbMiss:
7390 *
7391 * Call helper to do the fetching.
7392 * We flush all guest register shadow copies here.
7393 */
7394 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7395
7396#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7397 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7398#else
7399 RT_NOREF(idxInstr);
7400#endif
7401
7402#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7403 /* Save variables in volatile registers. */
7404 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7405 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7406#endif
7407
7408 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7409 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7410#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7411 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7412#else
7413 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7414#endif
7415
7416 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7417 if (iSegReg != UINT8_MAX)
7418 {
7419 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7420 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7421 }
7422
7423 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7424 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7425 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7426
7427 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7428 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7429
7430 /* Done setting up parameters, make the call. */
7431 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7432
7433 /*
7434 * Put the output in the right registers.
7435 */
7436 Assert(idxRegMemResult == pVarMem->idxReg);
7437 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7438 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7439
7440#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7441 /* Restore variables and guest shadow registers to volatile registers. */
7442 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7443 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7444#endif
7445
7446 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7447 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7448
7449#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7450 if (!TlbState.fSkip)
7451 {
7452 /* end of tlbsmiss - Jump to the done label. */
7453 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7454 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7455
7456 /*
7457 * TlbLookup:
7458 */
7459 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7460 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7461# ifdef VBOX_WITH_STATISTICS
7462 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7463 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7464# endif
7465
7466 /* [idxVarUnmapInfo] = 0; */
7467 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7468
7469 /*
7470 * TlbDone:
7471 */
7472 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7473
7474 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7475
7476# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7477 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7478 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7479# endif
7480 }
7481#else
7482 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7483#endif
7484
7485 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7486 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7487
7488 return off;
7489}
7490
7491
7492#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7493 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7494 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7495
7496#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7497 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7498 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7499
7500#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7501 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7502 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7503
7504#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7505 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7506 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7507
7508DECL_INLINE_THROW(uint32_t)
7509iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7510 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7511{
7512 /*
7513 * Assert sanity.
7514 */
7515 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7516#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7517 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7518#endif
7519 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7520 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7521 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7522#ifdef VBOX_STRICT
7523 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7524 {
7525 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7526 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7527 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7528 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7529 case IEM_ACCESS_TYPE_WRITE:
7530 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7531 case IEM_ACCESS_TYPE_READ:
7532 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7533 default: AssertFailed();
7534 }
7535#else
7536 RT_NOREF(fAccess);
7537#endif
7538
7539 /*
7540 * To keep things simple we have to commit any pending writes first as we
7541 * may end up making calls (there shouldn't be any at this point, so this
7542 * is just for consistency).
7543 */
7544 /** @todo we could postpone this till we make the call and reload the
7545 * registers after returning from the call. Not sure if that's sensible or
7546 * not, though. */
7547 off = iemNativeRegFlushPendingWrites(pReNative, off);
7548
7549 /*
7550 * Move/spill/flush stuff out of call-volatile registers.
7551 *
7552 * We exclude any register holding the bUnmapInfo variable, as we'll be
7553 * checking it after returning from the call and will free it afterwards.
7554 */
7555 /** @todo save+restore active registers and maybe guest shadows in miss
7556 * scenario. */
7557 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7558 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7559
7560 /*
7561 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7562 * to call the unmap helper function.
7563 *
7564 * The likelyhood of it being zero is higher than for the TLB hit when doing
7565 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7566 * access should also end up with a mapping that won't need special unmapping.
7567 */
7568 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7569 * should speed up things for the pure interpreter as well when TLBs
7570 * are enabled. */
7571#ifdef RT_ARCH_AMD64
7572 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7573 {
7574 /* test byte [rbp - xxx], 0ffh */
7575 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7576 pbCodeBuf[off++] = 0xf6;
7577 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7578 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7579 pbCodeBuf[off++] = 0xff;
7580 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7581 }
7582 else
7583#endif
7584 {
7585 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7586 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7587 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7588 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7589 }
7590 uint32_t const offJmpFixup = off;
7591 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7592
7593 /*
7594 * Call the unmap helper function.
7595 */
7596#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7597 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7598#else
7599 RT_NOREF(idxInstr);
7600#endif
7601
7602 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7603 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7604 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7605
7606 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7607 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7608
7609 /* Done setting up parameters, make the call. */
7610 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7611
7612 /* The bUnmapInfo variable is implictly free by these MCs. */
7613 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7614
7615 /*
7616 * Done, just fixup the jump for the non-call case.
7617 */
7618 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7619
7620 return off;
7621}
7622
7623
7624
7625/*********************************************************************************************************************************
7626* State and Exceptions *
7627*********************************************************************************************************************************/
7628
7629#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7630#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7631
7632#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7633#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7634#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7635
7636#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7637#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7638#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7639
7640
7641DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7642{
7643 /** @todo this needs a lot more work later. */
7644 RT_NOREF(pReNative, fForChange);
7645 return off;
7646}
7647
7648
7649
7650/*********************************************************************************************************************************
7651* Emitters for FPU related operations. *
7652*********************************************************************************************************************************/
7653
7654#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7655 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7656
7657/** Emits code for IEM_MC_FETCH_FCW. */
7658DECL_INLINE_THROW(uint32_t)
7659iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7660{
7661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7663
7664 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7665
7666 /* Allocate a temporary FCW register. */
7667 /** @todo eliminate extra register */
7668 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7669 kIemNativeGstRegUse_ReadOnly);
7670
7671 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7672
7673 /* Free but don't flush the FCW register. */
7674 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7675 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7676
7677 return off;
7678}
7679
7680
7681#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7682 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7683
7684/** Emits code for IEM_MC_FETCH_FSW. */
7685DECL_INLINE_THROW(uint32_t)
7686iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7687{
7688 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7689 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7690
7691 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7692 /* Allocate a temporary FSW register. */
7693 /** @todo eliminate extra register */
7694 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7695 kIemNativeGstRegUse_ReadOnly);
7696
7697 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7698
7699 /* Free but don't flush the FSW register. */
7700 iemNativeRegFreeTmp(pReNative, idxFswReg);
7701 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7702
7703 return off;
7704}
7705
7706
7707
7708#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7709
7710
7711/*********************************************************************************************************************************
7712* Emitters for SSE/AVX specific operations. *
7713*********************************************************************************************************************************/
7714
7715#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7716 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7717
7718/** Emits code for IEM_MC_COPY_XREG_U128. */
7719DECL_INLINE_THROW(uint32_t)
7720iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7721{
7722 /* This is a nop if the source and destination register are the same. */
7723 if (iXRegDst != iXRegSrc)
7724 {
7725 /* Allocate destination and source register. */
7726 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7727 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7728 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7729 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7730
7731 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7732
7733 /* Free but don't flush the source and destination register. */
7734 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7735 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7736 }
7737
7738 return off;
7739}
7740
7741
7742#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7743 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7744
7745/** Emits code for IEM_MC_FETCH_XREG_U128. */
7746DECL_INLINE_THROW(uint32_t)
7747iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7748{
7749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7750 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7751
7752 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7753 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7754
7755 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7756
7757 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7758
7759 /* Free but don't flush the source register. */
7760 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7761 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7762
7763 return off;
7764}
7765
7766
7767#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7768 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7769
7770#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
7771 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
7772
7773/** Emits code for IEM_MC_FETCH_XREG_U64. */
7774DECL_INLINE_THROW(uint32_t)
7775iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7776{
7777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7779
7780 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7781 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7782
7783 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7784 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7785
7786 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7787
7788 /* Free but don't flush the source register. */
7789 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7790 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7791
7792 return off;
7793}
7794
7795
7796#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
7797 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
7798
7799#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
7800 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
7801
7802/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
7803DECL_INLINE_THROW(uint32_t)
7804iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7805{
7806 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7807 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7808
7809 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7810 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7811
7812 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7813 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7814
7815 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7816
7817 /* Free but don't flush the source register. */
7818 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7819 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7820
7821 return off;
7822}
7823
7824
7825#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7826 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7827
7828/** Emits code for IEM_MC_FETCH_XREG_U16. */
7829DECL_INLINE_THROW(uint32_t)
7830iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7831{
7832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7833 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7834
7835 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7836 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7837
7838 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7840
7841 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7842
7843 /* Free but don't flush the source register. */
7844 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7845 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7846
7847 return off;
7848}
7849
7850
7851#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7852 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7853
7854/** Emits code for IEM_MC_FETCH_XREG_U8. */
7855DECL_INLINE_THROW(uint32_t)
7856iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7857{
7858 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7859 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7860
7861 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7862 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7863
7864 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7866
7867 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7868
7869 /* Free but don't flush the source register. */
7870 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7871 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7872
7873 return off;
7874}
7875
7876
7877#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7878 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7879
7880AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7881#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7882 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7883
7884
7885/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7886DECL_INLINE_THROW(uint32_t)
7887iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7888{
7889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7891
7892 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7893 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7894 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7895
7896 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7897
7898 /* Free but don't flush the source register. */
7899 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7900 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7901
7902 return off;
7903}
7904
7905
7906#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7907 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7908
7909#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7910 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7911
7912#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7913 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7914
7915#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7916 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7917
7918#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7919 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7920
7921#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7922 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7923
7924/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7925DECL_INLINE_THROW(uint32_t)
7926iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
7927 uint8_t cbLocal, uint8_t iElem)
7928{
7929 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7930 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7931
7932#ifdef VBOX_STRICT
7933 switch (cbLocal)
7934 {
7935 case sizeof(uint64_t): Assert(iElem < 2); break;
7936 case sizeof(uint32_t): Assert(iElem < 4); break;
7937 case sizeof(uint16_t): Assert(iElem < 8); break;
7938 case sizeof(uint8_t): Assert(iElem < 16); break;
7939 default: AssertFailed();
7940 }
7941#endif
7942
7943 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7944 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7945 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7946
7947 switch (cbLocal)
7948 {
7949 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7950 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7951 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7952 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7953 default: AssertFailed();
7954 }
7955
7956 /* Free but don't flush the source register. */
7957 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7958 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7959
7960 return off;
7961}
7962
7963
7964#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7965 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7966
7967/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
7968DECL_INLINE_THROW(uint32_t)
7969iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7970{
7971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7972 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7973
7974 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7975 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7976 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7977
7978 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7979 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7980 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7981
7982 /* Free but don't flush the source register. */
7983 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7984 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7985
7986 return off;
7987}
7988
7989
7990#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7991 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7992
7993/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
7994DECL_INLINE_THROW(uint32_t)
7995iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7996{
7997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7998 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7999
8000 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8001 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8002 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8003
8004 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
8005 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8006 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8007
8008 /* Free but don't flush the source register. */
8009 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8010 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8011
8012 return off;
8013}
8014
8015
8016#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
8017 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
8018
8019/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
8020DECL_INLINE_THROW(uint32_t)
8021iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
8022 uint8_t idxSrcVar, uint8_t iDwSrc)
8023{
8024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8025 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8026
8027 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8028 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8029 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8030
8031 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
8032 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
8033
8034 /* Free but don't flush the destination register. */
8035 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8036 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8037
8038 return off;
8039}
8040
8041
8042#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8043 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8044
8045/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
8046DECL_INLINE_THROW(uint32_t)
8047iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8048{
8049 /*
8050 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8051 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8052 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8053 */
8054 if (iYRegDst != iYRegSrc)
8055 {
8056 /* Allocate destination and source register. */
8057 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8058 kIemNativeGstSimdRegLdStSz_256,
8059 kIemNativeGstRegUse_ForFullWrite);
8060 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8061 kIemNativeGstSimdRegLdStSz_Low128,
8062 kIemNativeGstRegUse_ReadOnly);
8063
8064 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8065 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8066
8067 /* Free but don't flush the source and destination register. */
8068 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8069 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8070 }
8071 else
8072 {
8073 /* This effectively only clears the upper 128-bits of the register. */
8074 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8075 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8076
8077 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8078
8079 /* Free but don't flush the destination register. */
8080 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8081 }
8082
8083 return off;
8084}
8085
8086
8087#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8088 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8089
8090/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
8091DECL_INLINE_THROW(uint32_t)
8092iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8093{
8094 /*
8095 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8096 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8097 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8098 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
8099 */
8100 if (iYRegDst != iYRegSrc)
8101 {
8102 /* Allocate destination and source register. */
8103 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8104 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8105 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8106 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8107
8108 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8109
8110 /* Free but don't flush the source and destination register. */
8111 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8112 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8113 }
8114
8115 return off;
8116}
8117
8118
8119#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
8120 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
8121
8122/** Emits code for IEM_MC_FETCH_YREG_U128. */
8123DECL_INLINE_THROW(uint32_t)
8124iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
8125{
8126 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8127 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8128
8129 Assert(iDQWord <= 1);
8130 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8131 iDQWord == 1
8132 ? kIemNativeGstSimdRegLdStSz_High128
8133 : kIemNativeGstSimdRegLdStSz_Low128,
8134 kIemNativeGstRegUse_ReadOnly);
8135
8136 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8137 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8138
8139 if (iDQWord == 1)
8140 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8141 else
8142 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8143
8144 /* Free but don't flush the source register. */
8145 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8146 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8147
8148 return off;
8149}
8150
8151
8152#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
8153 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
8154
8155/** Emits code for IEM_MC_FETCH_YREG_U64. */
8156DECL_INLINE_THROW(uint32_t)
8157iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
8158{
8159 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8160 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8161
8162 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8163 iQWord >= 2
8164 ? kIemNativeGstSimdRegLdStSz_High128
8165 : kIemNativeGstSimdRegLdStSz_Low128,
8166 kIemNativeGstRegUse_ReadOnly);
8167
8168 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8169 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8170
8171 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8172
8173 /* Free but don't flush the source register. */
8174 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8175 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8176
8177 return off;
8178}
8179
8180
8181#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8182 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8183
8184/** Emits code for IEM_MC_FETCH_YREG_U32. */
8185DECL_INLINE_THROW(uint32_t)
8186iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8187{
8188 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8189 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8190
8191 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8192 iDWord >= 4
8193 ? kIemNativeGstSimdRegLdStSz_High128
8194 : kIemNativeGstSimdRegLdStSz_Low128,
8195 kIemNativeGstRegUse_ReadOnly);
8196
8197 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8198 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8199
8200 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8201
8202 /* Free but don't flush the source register. */
8203 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8204 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8205
8206 return off;
8207}
8208
8209
8210#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8211 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8212
8213/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8214DECL_INLINE_THROW(uint32_t)
8215iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8216{
8217 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8218 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8219
8220 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8221
8222 /* Free but don't flush the register. */
8223 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8224
8225 return off;
8226}
8227
8228
8229#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8230 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8231
8232/** Emits code for IEM_MC_STORE_YREG_U128. */
8233DECL_INLINE_THROW(uint32_t)
8234iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8235{
8236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8237 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8238
8239 Assert(iDQword <= 1);
8240 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8241 iDQword == 0
8242 ? kIemNativeGstSimdRegLdStSz_Low128
8243 : kIemNativeGstSimdRegLdStSz_High128,
8244 kIemNativeGstRegUse_ForFullWrite);
8245
8246 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8247
8248 if (iDQword == 0)
8249 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8250 else
8251 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8252
8253 /* Free but don't flush the source register. */
8254 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8255 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8256
8257 return off;
8258}
8259
8260
8261#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8262 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8263
8264/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8265DECL_INLINE_THROW(uint32_t)
8266iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8267{
8268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8269 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8270
8271 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8272 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8273
8274 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8275
8276 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8277 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8278
8279 /* Free but don't flush the source register. */
8280 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8281 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8282
8283 return off;
8284}
8285
8286
8287#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8288 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8289
8290/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8291DECL_INLINE_THROW(uint32_t)
8292iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8293{
8294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8295 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8296
8297 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8298 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8299
8300 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8301
8302 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8303 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8304
8305 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8306 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8307
8308 return off;
8309}
8310
8311
8312#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8313 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8314
8315/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8316DECL_INLINE_THROW(uint32_t)
8317iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8318{
8319 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8320 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8321
8322 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8323 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8324
8325 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8326
8327 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8328 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8329
8330 /* Free but don't flush the source register. */
8331 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8332 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8333
8334 return off;
8335}
8336
8337
8338#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8339 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8340
8341/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8342DECL_INLINE_THROW(uint32_t)
8343iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8344{
8345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8346 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8347
8348 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8349 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8350
8351 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8352
8353 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8354 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8355
8356 /* Free but don't flush the source register. */
8357 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8358 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8359
8360 return off;
8361}
8362
8363
8364#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8365 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8366
8367/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8368DECL_INLINE_THROW(uint32_t)
8369iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8370{
8371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8372 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8373
8374 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8375 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8376
8377 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8378
8379 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8380 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8381
8382 /* Free but don't flush the source register. */
8383 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8384 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8385
8386 return off;
8387}
8388
8389
8390#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8391 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8392
8393/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8394DECL_INLINE_THROW(uint32_t)
8395iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8396{
8397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8398 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8399
8400 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8401 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8402
8403 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8404
8405 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8406
8407 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8408 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8409
8410 return off;
8411}
8412
8413
8414#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8415 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8416
8417/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8418DECL_INLINE_THROW(uint32_t)
8419iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8420{
8421 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8422 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8423
8424 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8425 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8426
8427 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8428
8429 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8430
8431 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8432 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8433
8434 return off;
8435}
8436
8437
8438#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8439 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8440
8441/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8442DECL_INLINE_THROW(uint32_t)
8443iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8444{
8445 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8446 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8447
8448 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8449 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8450
8451 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8452
8453 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8454
8455 /* Free but don't flush the source register. */
8456 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8457 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8458
8459 return off;
8460}
8461
8462
8463#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8464 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8465
8466/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8467DECL_INLINE_THROW(uint32_t)
8468iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8469{
8470 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8471 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8472
8473 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8474 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8475
8476 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8477
8478 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8479
8480 /* Free but don't flush the source register. */
8481 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8482 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8483
8484 return off;
8485}
8486
8487
8488#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8489 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8490
8491/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8492DECL_INLINE_THROW(uint32_t)
8493iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8494{
8495 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8496 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8497
8498 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8499 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8500
8501 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8502
8503 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8504
8505 /* Free but don't flush the source register. */
8506 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8507 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8508
8509 return off;
8510}
8511
8512
8513#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8514 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8515
8516/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8517DECL_INLINE_THROW(uint32_t)
8518iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8519{
8520 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8521 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8522
8523 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8524 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8525
8526 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8527
8528 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8529 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8530
8531 /* Free but don't flush the source register. */
8532 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8533 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8534
8535 return off;
8536}
8537
8538
8539#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8540 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8541
8542/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8543DECL_INLINE_THROW(uint32_t)
8544iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8545{
8546 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8547 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8548
8549 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8550 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8551
8552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8553
8554 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8555 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8556
8557 /* Free but don't flush the source register. */
8558 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8559 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8560
8561 return off;
8562}
8563
8564
8565#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8566 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8567
8568/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8569DECL_INLINE_THROW(uint32_t)
8570iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8571{
8572 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8573 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8574
8575 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8576 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8577 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8578 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8579 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8580
8581 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8582 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8583 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8584
8585 /* Free but don't flush the source and destination registers. */
8586 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8587 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8588 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8589
8590 return off;
8591}
8592
8593
8594#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8595 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8596
8597/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8598DECL_INLINE_THROW(uint32_t)
8599iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8600{
8601 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8603
8604 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8605 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8606 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8607 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8608 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8609
8610 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8611 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8612 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8613
8614 /* Free but don't flush the source and destination registers. */
8615 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8616 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8617 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8618
8619 return off;
8620}
8621
8622
8623#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8624 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8625
8626
8627/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8628DECL_INLINE_THROW(uint32_t)
8629iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8630{
8631 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8632 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8633
8634 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8635 if (bImm8Mask & RT_BIT(0))
8636 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8637 if (bImm8Mask & RT_BIT(1))
8638 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8639 if (bImm8Mask & RT_BIT(2))
8640 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8641 if (bImm8Mask & RT_BIT(3))
8642 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8643
8644 /* Free but don't flush the destination register. */
8645 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8646
8647 return off;
8648}
8649
8650
8651#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8652 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8653
8654
8655/** Emits code for IEM_MC_FETCH_YREG_U256. */
8656DECL_INLINE_THROW(uint32_t)
8657iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8658{
8659 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8660 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8661
8662 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8663 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8664 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8665
8666 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8667
8668 /* Free but don't flush the source register. */
8669 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8670 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8671
8672 return off;
8673}
8674
8675
8676#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8677 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8678
8679
8680/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8681DECL_INLINE_THROW(uint32_t)
8682iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8683{
8684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8685 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8686
8687 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8688 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8689 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8690
8691 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8692
8693 /* Free but don't flush the source register. */
8694 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8695 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8696
8697 return off;
8698}
8699
8700
8701#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
8702 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
8703
8704
8705/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
8706DECL_INLINE_THROW(uint32_t)
8707iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
8708 uint8_t idxSrcVar, uint8_t iDwSrc)
8709{
8710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8711 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8712
8713 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8714 iDwDst < 4
8715 ? kIemNativeGstSimdRegLdStSz_Low128
8716 : kIemNativeGstSimdRegLdStSz_High128,
8717 kIemNativeGstRegUse_ForUpdate);
8718 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8719 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8720
8721 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
8722 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
8723
8724 /* Free but don't flush the source register. */
8725 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8726 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8727 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8728
8729 return off;
8730}
8731
8732
8733#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
8734 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
8735
8736
8737/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
8738DECL_INLINE_THROW(uint32_t)
8739iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
8740 uint8_t idxSrcVar, uint8_t iQwSrc)
8741{
8742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8744
8745 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8746 iQwDst < 2
8747 ? kIemNativeGstSimdRegLdStSz_Low128
8748 : kIemNativeGstSimdRegLdStSz_High128,
8749 kIemNativeGstRegUse_ForUpdate);
8750 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8751 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8752
8753 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
8754 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
8755
8756 /* Free but don't flush the source register. */
8757 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8758 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8759 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8760
8761 return off;
8762}
8763
8764
8765#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
8766 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
8767
8768
8769/** Emits code for IEM_MC_STORE_YREG_U64. */
8770DECL_INLINE_THROW(uint32_t)
8771iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
8772{
8773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8775
8776 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8777 iQwDst < 2
8778 ? kIemNativeGstSimdRegLdStSz_Low128
8779 : kIemNativeGstSimdRegLdStSz_High128,
8780 kIemNativeGstRegUse_ForUpdate);
8781
8782 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8783
8784 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
8785
8786 /* Free but don't flush the source register. */
8787 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8788 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8789
8790 return off;
8791}
8792
8793
8794#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
8795 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
8796
8797/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
8798DECL_INLINE_THROW(uint32_t)
8799iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8800{
8801 RT_NOREF(pReNative, iYReg);
8802 /** @todo Needs to be implemented when support for AVX-512 is added. */
8803 return off;
8804}
8805
8806
8807
8808/*********************************************************************************************************************************
8809* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8810*********************************************************************************************************************************/
8811
8812/**
8813 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
8814 */
8815DECL_INLINE_THROW(uint32_t)
8816iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8817{
8818 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
8819 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
8820 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8821 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8822
8823 /*
8824 * Need to do the FPU preparation.
8825 */
8826 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8827
8828 /*
8829 * Do all the call setup and cleanup.
8830 */
8831 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
8832
8833 /*
8834 * Load the MXCSR register into the first argument and mask out the current exception flags.
8835 */
8836 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
8837 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
8838
8839 /*
8840 * Make the call.
8841 */
8842 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8843
8844 /*
8845 * The updated MXCSR is in the return register.
8846 */
8847 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
8848
8849#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8850 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8851 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8852#endif
8853 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8854
8855 return off;
8856}
8857
8858
8859#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8860 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8861
8862/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8863DECL_INLINE_THROW(uint32_t)
8864iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8865{
8866 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8867 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8868 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8869}
8870
8871
8872#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8873 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8874
8875/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8876DECL_INLINE_THROW(uint32_t)
8877iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8878{
8879 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8880 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8881 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8882 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8883}
8884
8885
8886/*********************************************************************************************************************************
8887* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8888*********************************************************************************************************************************/
8889
8890#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8891 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8892
8893/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8894DECL_INLINE_THROW(uint32_t)
8895iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8896{
8897 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8898 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8899 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8900}
8901
8902
8903#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8904 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8905
8906/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8907DECL_INLINE_THROW(uint32_t)
8908iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8909{
8910 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8911 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8912 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8913 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8914}
8915#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8916
8917
8918/*********************************************************************************************************************************
8919* Include instruction emitters. *
8920*********************************************************************************************************************************/
8921#include "target-x86/IEMAllN8veEmit-x86.h"
8922
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette