VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104150

Last change on this file since 104150 was 104150, checked in by vboxsync, 10 months ago

VMM/IEM: Rework the [v][u]comis[sd] instruction emulations to work by value instead by reference and drop the IEM_MC_FETCH_MEM_XMM_U32/IEM_MC_FETCH_XMM_U64 microcode statements, enables recompilation of those instructions, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 431.8 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104150 2024-04-04 08:54:42Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(idxInstr);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
365 }
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToRip64AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToEip32AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442
443 /* Free but don't flush the PC register. */
444 iemNativeRegFreeTmp(pReNative, idxPcReg);
445#endif
446
447#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
448 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
449
450 pReNative->Core.offPc += cbInstr;
451# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
452 off = iemNativePcAdjustCheck(pReNative, off);
453# endif
454 if (pReNative->cCondDepth)
455 off = iemNativeEmitPcWriteback(pReNative, off);
456 else
457 pReNative->Core.cInstrPcUpdateSkipped++;
458#endif
459
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
472
473/** Same as iemRegAddToIp16AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
490
491 /* Free but don't flush the PC register. */
492 iemNativeRegFreeTmp(pReNative, idxPcReg);
493#endif
494
495#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
496 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
497
498 pReNative->Core.offPc += cbInstr;
499# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
500 off = iemNativePcAdjustCheck(pReNative, off);
501# endif
502 if (pReNative->cCondDepth)
503 off = iemNativeEmitPcWriteback(pReNative, off);
504 else
505 pReNative->Core.cInstrPcUpdateSkipped++;
506#endif
507
508 return off;
509}
510
511
512
513/*********************************************************************************************************************************
514* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
515*********************************************************************************************************************************/
516
517#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
518 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
519 (a_enmEffOpSize), pCallEntry->idxInstr); \
520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
521
522#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
523 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
524 (a_enmEffOpSize), pCallEntry->idxInstr); \
525 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
526 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
527
528#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
529 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
530 IEMMODE_16BIT, pCallEntry->idxInstr); \
531 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
532
533#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
534 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
535 IEMMODE_16BIT, pCallEntry->idxInstr); \
536 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
537 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
538
539#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
540 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
541 IEMMODE_64BIT, pCallEntry->idxInstr); \
542 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
543
544#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
545 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
546 IEMMODE_64BIT, pCallEntry->idxInstr); \
547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
549
550/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
551 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
552 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
553DECL_INLINE_THROW(uint32_t)
554iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
555 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
556{
557 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
558
559 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
560 off = iemNativeRegFlushPendingWrites(pReNative, off);
561
562#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
563 Assert(pReNative->Core.offPc == 0);
564
565 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
566#endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition. */
572 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
573
574 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
575 {
576 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
578 }
579 else
580 {
581 /* Just truncate the result to 16-bit IP. */
582 Assert(enmEffOpSize == IEMMODE_16BIT);
583 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
584 }
585 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
586
587 /* Free but don't flush the PC register. */
588 iemNativeRegFreeTmp(pReNative, idxPcReg);
589
590 return off;
591}
592
593
594#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
595 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
596 (a_enmEffOpSize), pCallEntry->idxInstr); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
603 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
604
605#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
606 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
607 IEMMODE_16BIT, pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
615
616#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
618 IEMMODE_32BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_32BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
626
627/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
628 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
629 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
630DECL_INLINE_THROW(uint32_t)
631iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
632 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
633{
634 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
635
636 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
637 off = iemNativeRegFlushPendingWrites(pReNative, off);
638
639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
640 Assert(pReNative->Core.offPc == 0);
641
642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
643#endif
644
645 /* Allocate a temporary PC register. */
646 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
647
648 /* Perform the addition. */
649 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
650
651 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
652 if (enmEffOpSize == IEMMODE_16BIT)
653 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
654
655 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
656/** @todo we can skip this in 32-bit FLAT mode. */
657 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
658
659 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
660
661 /* Free but don't flush the PC register. */
662 iemNativeRegFreeTmp(pReNative, idxPcReg);
663
664 return off;
665}
666
667
668#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
669 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
671
672#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
676
677#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
678 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
680
681#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
682 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
683 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
684 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
685
686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
687 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
689
690#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
692 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
694
695/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
696DECL_INLINE_THROW(uint32_t)
697iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
698 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
699{
700 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
701 off = iemNativeRegFlushPendingWrites(pReNative, off);
702
703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
704 Assert(pReNative->Core.offPc == 0);
705
706 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
707#endif
708
709 /* Allocate a temporary PC register. */
710 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
711
712 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
713 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
714 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
715 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
716 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
717
718 /* Free but don't flush the PC register. */
719 iemNativeRegFreeTmp(pReNative, idxPcReg);
720
721 return off;
722}
723
724
725
726/*********************************************************************************************************************************
727* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
728*********************************************************************************************************************************/
729
730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
731#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
732 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
733
734/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
735#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
736 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
743 * clears flags. */
744#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
745 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
747
748/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
749 * clears flags. */
750#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
751 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
753
754/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
755 * clears flags. */
756#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
757 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
759
760#undef IEM_MC_SET_RIP_U16_AND_FINISH
761
762
763/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
764#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
765 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
766
767/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
768#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
769 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
772 * clears flags. */
773#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
774 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
775 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
776
777/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
778 * and clears flags. */
779#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
780 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
782
783#undef IEM_MC_SET_RIP_U32_AND_FINISH
784
785
786/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
787#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
788 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
789
790/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
791 * and clears flags. */
792#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
793 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
794 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
795
796#undef IEM_MC_SET_RIP_U64_AND_FINISH
797
798
799/** Same as iemRegRipJumpU16AndFinishNoFlags,
800 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
801DECL_INLINE_THROW(uint32_t)
802iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
803 uint8_t idxInstr, uint8_t cbVar)
804{
805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
807
808 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
809 off = iemNativeRegFlushPendingWrites(pReNative, off);
810
811#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
812 Assert(pReNative->Core.offPc == 0);
813
814 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
815#endif
816
817 /* Get a register with the new PC loaded from idxVarPc.
818 Note! This ASSUMES that the high bits of the GPR is zeroed. */
819 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
820
821 /* Check limit (may #GP(0) + exit TB). */
822 if (!f64Bit)
823/** @todo we can skip this test in FLAT 32-bit mode. */
824 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
825 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
826 else if (cbVar > sizeof(uint32_t))
827 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
828
829 /* Store the result. */
830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
831
832 iemNativeVarRegisterRelease(pReNative, idxVarPc);
833 /** @todo implictly free the variable? */
834
835 return off;
836}
837
838
839
840/*********************************************************************************************************************************
841* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
842*********************************************************************************************************************************/
843
844#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
845 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
846
847/**
848 * Emits code to check if a \#NM exception should be raised.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxInstr The current instruction.
854 */
855DECL_INLINE_THROW(uint32_t)
856iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
857{
858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
859 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
860
861 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
862 {
863#endif
864 /*
865 * Make sure we don't have any outstanding guest register writes as we may
866 * raise an #NM and all guest register must be up to date in CPUMCTX.
867 */
868 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
869 off = iemNativeRegFlushPendingWrites(pReNative, off);
870
871#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
872 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
873#else
874 RT_NOREF(idxInstr);
875#endif
876
877 /* Allocate a temporary CR0 register. */
878 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
879 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
880
881 /*
882 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
883 * return raisexcpt();
884 */
885 /* Test and jump. */
886 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
887
888 /* Free but don't flush the CR0 register. */
889 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
890
891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
893 }
894 else
895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
896#endif
897
898 return off;
899}
900
901
902#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
903 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
904
905/**
906 * Emits code to check if a \#MF exception should be raised.
907 *
908 * @returns New code buffer offset, UINT32_MAX on failure.
909 * @param pReNative The native recompile state.
910 * @param off The code buffer offset.
911 * @param idxInstr The current instruction.
912 */
913DECL_INLINE_THROW(uint32_t)
914iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
915{
916 /*
917 * Make sure we don't have any outstanding guest register writes as we may
918 * raise an #MF and all guest register must be up to date in CPUMCTX.
919 */
920 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
921 off = iemNativeRegFlushPendingWrites(pReNative, off);
922
923#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
924 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
925#else
926 RT_NOREF(idxInstr);
927#endif
928
929 /* Allocate a temporary FSW register. */
930 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
931 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
932
933 /*
934 * if (FSW & X86_FSW_ES != 0)
935 * return raisexcpt();
936 */
937 /* Test and jump. */
938 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
939
940 /* Free but don't flush the FSW register. */
941 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
942
943 return off;
944}
945
946
947#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
948 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
949
950/**
951 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
952 *
953 * @returns New code buffer offset, UINT32_MAX on failure.
954 * @param pReNative The native recompile state.
955 * @param off The code buffer offset.
956 * @param idxInstr The current instruction.
957 */
958DECL_INLINE_THROW(uint32_t)
959iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
960{
961#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
962 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
963
964 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
965 {
966#endif
967 /*
968 * Make sure we don't have any outstanding guest register writes as we may
969 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
970 */
971 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
972 off = iemNativeRegFlushPendingWrites(pReNative, off);
973
974#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
975 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
976#else
977 RT_NOREF(idxInstr);
978#endif
979
980 /* Allocate a temporary CR0 and CR4 register. */
981 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
982 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
983 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
984 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
985
986 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
987#ifdef RT_ARCH_AMD64
988 /*
989 * We do a modified test here:
990 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
991 * else { goto RaiseSseRelated; }
992 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
993 * all targets except the 386, which doesn't support SSE, this should
994 * be a safe assumption.
995 */
996 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
997 //pCodeBuf[off++] = 0xcc;
998 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
999 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
1000 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
1001 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
1002 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
1003 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
1004
1005#elif defined(RT_ARCH_ARM64)
1006 /*
1007 * We do a modified test here:
1008 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
1009 * else { goto RaiseSseRelated; }
1010 */
1011 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
1012 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1013 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
1014 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
1015 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
1016 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1017 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1018 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
1019 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1020 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1021 idxLabelRaiseSseRelated);
1022
1023#else
1024# error "Port me!"
1025#endif
1026
1027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1028 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1029 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1030 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1031
1032#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1033 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1034 }
1035 else
1036 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1037#endif
1038
1039 return off;
1040}
1041
1042
1043#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1044 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1045
1046/**
1047 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1048 *
1049 * @returns New code buffer offset, UINT32_MAX on failure.
1050 * @param pReNative The native recompile state.
1051 * @param off The code buffer offset.
1052 * @param idxInstr The current instruction.
1053 */
1054DECL_INLINE_THROW(uint32_t)
1055iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1056{
1057#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1058 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1059
1060 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1061 {
1062#endif
1063 /*
1064 * Make sure we don't have any outstanding guest register writes as we may
1065 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1066 */
1067 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1068 off = iemNativeRegFlushPendingWrites(pReNative, off);
1069
1070#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1071 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1072#else
1073 RT_NOREF(idxInstr);
1074#endif
1075
1076 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1077 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1078 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1079 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1080 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1081 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1082
1083 /*
1084 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1085 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1086 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1087 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1088 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1089 * { likely }
1090 * else { goto RaiseAvxRelated; }
1091 */
1092#ifdef RT_ARCH_AMD64
1093 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1094 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1095 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1096 ^ 0x1a) ) { likely }
1097 else { goto RaiseAvxRelated; } */
1098 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1099 //pCodeBuf[off++] = 0xcc;
1100 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1101 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1102 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1103 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1104 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1105 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1106 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1107 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1108 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1109 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1110 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1111
1112#elif defined(RT_ARCH_ARM64)
1113 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1114 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1115 else { goto RaiseAvxRelated; } */
1116 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1117 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1118 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1120 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1121 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1122 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1123 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1124 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1125 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1126 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1127 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1128 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1129 idxLabelRaiseAvxRelated);
1130
1131#else
1132# error "Port me!"
1133#endif
1134
1135 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1136 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1137 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1138 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1139#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1140 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1141 }
1142 else
1143 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1144#endif
1145
1146 return off;
1147}
1148
1149
1150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1151#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1152 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
1153
1154/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1155DECL_INLINE_THROW(uint32_t)
1156iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1157{
1158 /*
1159 * Make sure we don't have any outstanding guest register writes as we may
1160 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
1161 */
1162 off = iemNativeRegFlushPendingWrites(pReNative, off);
1163
1164#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1165 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1166#else
1167 RT_NOREF(idxInstr);
1168#endif
1169
1170 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1171 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1172 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1173
1174 /* mov tmp, varmxcsr */
1175 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1176 /* tmp &= X86_MXCSR_XCPT_MASK */
1177 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1178 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1179 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1180 /* tmp = ~tmp */
1181 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1182 /* tmp &= mxcsr */
1183 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1184 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1185 idxLabelRaiseSseAvxFpRelated);
1186
1187 /* Free but don't flush the MXCSR register. */
1188 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1189 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1190
1191 return off;
1192}
1193#endif
1194
1195
1196#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1197 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1198
1199/**
1200 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1201 *
1202 * @returns New code buffer offset, UINT32_MAX on failure.
1203 * @param pReNative The native recompile state.
1204 * @param off The code buffer offset.
1205 * @param idxInstr The current instruction.
1206 */
1207DECL_INLINE_THROW(uint32_t)
1208iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1209{
1210 /*
1211 * Make sure we don't have any outstanding guest register writes as we may
1212 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1213 */
1214 off = iemNativeRegFlushPendingWrites(pReNative, off);
1215
1216#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1217 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1218#else
1219 RT_NOREF(idxInstr);
1220#endif
1221
1222 /* Allocate a temporary CR4 register. */
1223 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1224 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1225 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1226
1227 /*
1228 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1229 * return raisexcpt();
1230 */
1231 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1232
1233 /* raise \#UD exception unconditionally. */
1234 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1235
1236 /* Free but don't flush the CR4 register. */
1237 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1238
1239 return off;
1240}
1241
1242
1243#define IEM_MC_RAISE_DIVIDE_ERROR() \
1244 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1245
1246/**
1247 * Emits code to raise a \#DE.
1248 *
1249 * @returns New code buffer offset, UINT32_MAX on failure.
1250 * @param pReNative The native recompile state.
1251 * @param off The code buffer offset.
1252 * @param idxInstr The current instruction.
1253 */
1254DECL_INLINE_THROW(uint32_t)
1255iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1256{
1257 /*
1258 * Make sure we don't have any outstanding guest register writes as we may
1259 */
1260 off = iemNativeRegFlushPendingWrites(pReNative, off);
1261
1262#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1263 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1264#else
1265 RT_NOREF(idxInstr);
1266#endif
1267
1268 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1269
1270 /* raise \#DE exception unconditionally. */
1271 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1272
1273 return off;
1274}
1275
1276
1277#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
1278 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
1279
1280/**
1281 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
1282 *
1283 * @returns New code buffer offset, UINT32_MAX on failure.
1284 * @param pReNative The native recompile state.
1285 * @param off The code buffer offset.
1286 * @param idxInstr The current instruction.
1287 */
1288DECL_INLINE_THROW(uint32_t)
1289iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
1290{
1291 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
1292 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
1293
1294 /*
1295 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
1296 */
1297 off = iemNativeRegFlushPendingWrites(pReNative, off);
1298
1299#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1300 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1301#else
1302 RT_NOREF(idxInstr);
1303#endif
1304
1305 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
1306 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
1307
1308 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
1309
1310 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
1311 return off;
1312}
1313
1314
1315/*********************************************************************************************************************************
1316* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1317*********************************************************************************************************************************/
1318
1319/**
1320 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1321 *
1322 * @returns Pointer to the condition stack entry on success, NULL on failure
1323 * (too many nestings)
1324 */
1325DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1326{
1327#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1328 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1329#endif
1330
1331 uint32_t const idxStack = pReNative->cCondDepth;
1332 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1333
1334 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1335 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1336
1337 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1338 pEntry->fInElse = false;
1339 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1340 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1341
1342 return pEntry;
1343}
1344
1345
1346/**
1347 * Start of the if-block, snapshotting the register and variable state.
1348 */
1349DECL_INLINE_THROW(void)
1350iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1351{
1352 Assert(offIfBlock != UINT32_MAX);
1353 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1354 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1355 Assert(!pEntry->fInElse);
1356
1357 /* Define the start of the IF block if request or for disassembly purposes. */
1358 if (idxLabelIf != UINT32_MAX)
1359 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1360#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1361 else
1362 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1363#else
1364 RT_NOREF(offIfBlock);
1365#endif
1366
1367#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1368 Assert(pReNative->Core.offPc == 0);
1369#endif
1370
1371 /* Copy the initial state so we can restore it in the 'else' block. */
1372 pEntry->InitialState = pReNative->Core;
1373}
1374
1375
1376#define IEM_MC_ELSE() } while (0); \
1377 off = iemNativeEmitElse(pReNative, off); \
1378 do {
1379
1380/** Emits code related to IEM_MC_ELSE. */
1381DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1382{
1383 /* Check sanity and get the conditional stack entry. */
1384 Assert(off != UINT32_MAX);
1385 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1386 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1387 Assert(!pEntry->fInElse);
1388
1389#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1390 /* Writeback any dirty shadow registers. */
1391 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1392 * in one of the branches and leave guest registers already dirty before the start of the if
1393 * block alone. */
1394 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1395#endif
1396
1397 /* Jump to the endif */
1398 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1399
1400 /* Define the else label and enter the else part of the condition. */
1401 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1402 pEntry->fInElse = true;
1403
1404#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1405 Assert(pReNative->Core.offPc == 0);
1406#endif
1407
1408 /* Snapshot the core state so we can do a merge at the endif and restore
1409 the snapshot we took at the start of the if-block. */
1410 pEntry->IfFinalState = pReNative->Core;
1411 pReNative->Core = pEntry->InitialState;
1412
1413 return off;
1414}
1415
1416
1417#define IEM_MC_ENDIF() } while (0); \
1418 off = iemNativeEmitEndIf(pReNative, off)
1419
1420/** Emits code related to IEM_MC_ENDIF. */
1421DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1422{
1423 /* Check sanity and get the conditional stack entry. */
1424 Assert(off != UINT32_MAX);
1425 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1426 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1427
1428#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1429 Assert(pReNative->Core.offPc == 0);
1430#endif
1431#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1432 /* Writeback any dirty shadow registers (else branch). */
1433 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1434 * in one of the branches and leave guest registers already dirty before the start of the if
1435 * block alone. */
1436 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1437#endif
1438
1439 /*
1440 * Now we have find common group with the core state at the end of the
1441 * if-final. Use the smallest common denominator and just drop anything
1442 * that isn't the same in both states.
1443 */
1444 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1445 * which is why we're doing this at the end of the else-block.
1446 * But we'd need more info about future for that to be worth the effort. */
1447 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1448#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1449 Assert( pOther->bmGstRegShadowDirty == 0
1450 && pReNative->Core.bmGstRegShadowDirty == 0);
1451#endif
1452
1453 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1454 {
1455 /* shadow guest stuff first. */
1456 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1457 if (fGstRegs)
1458 {
1459 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1460 do
1461 {
1462 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1463 fGstRegs &= ~RT_BIT_64(idxGstReg);
1464
1465 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1466 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1467 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1468 {
1469 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1470 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1471
1472#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1473 /* Writeback any dirty shadow registers we are about to unshadow. */
1474 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1475#endif
1476 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1477 }
1478 } while (fGstRegs);
1479 }
1480 else
1481 {
1482 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1483#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1484 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1485#endif
1486 }
1487
1488 /* Check variables next. For now we must require them to be identical
1489 or stuff we can recreate. */
1490 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1491 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1492 if (fVars)
1493 {
1494 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1495 do
1496 {
1497 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1498 fVars &= ~RT_BIT_32(idxVar);
1499
1500 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1501 {
1502 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1503 continue;
1504 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1505 {
1506 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1507 if (idxHstReg != UINT8_MAX)
1508 {
1509 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1510 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1511 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1512 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1513 }
1514 continue;
1515 }
1516 }
1517 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1518 continue;
1519
1520 /* Irreconcilable, so drop it. */
1521 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1522 if (idxHstReg != UINT8_MAX)
1523 {
1524 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1525 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1526 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1527 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1528 }
1529 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1530 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1531 } while (fVars);
1532 }
1533
1534 /* Finally, check that the host register allocations matches. */
1535 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1536 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1537 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1539 }
1540
1541 /*
1542 * Define the endif label and maybe the else one if we're still in the 'if' part.
1543 */
1544 if (!pEntry->fInElse)
1545 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1546 else
1547 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1548 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1549
1550 /* Pop the conditional stack.*/
1551 pReNative->cCondDepth -= 1;
1552
1553 return off;
1554}
1555
1556
1557#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1558 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1559 do {
1560
1561/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1562DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1563{
1564 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1565 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1566
1567 /* Get the eflags. */
1568 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1569 kIemNativeGstRegUse_ReadOnly);
1570
1571 /* Test and jump. */
1572 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1573
1574 /* Free but don't flush the EFlags register. */
1575 iemNativeRegFreeTmp(pReNative, idxEflReg);
1576
1577 /* Make a copy of the core state now as we start the if-block. */
1578 iemNativeCondStartIfBlock(pReNative, off);
1579
1580 return off;
1581}
1582
1583
1584#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1585 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1586 do {
1587
1588/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1589DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1590{
1591 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1592 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1593
1594 /* Get the eflags. */
1595 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1596 kIemNativeGstRegUse_ReadOnly);
1597
1598 /* Test and jump. */
1599 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1600
1601 /* Free but don't flush the EFlags register. */
1602 iemNativeRegFreeTmp(pReNative, idxEflReg);
1603
1604 /* Make a copy of the core state now as we start the if-block. */
1605 iemNativeCondStartIfBlock(pReNative, off);
1606
1607 return off;
1608}
1609
1610
1611#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1612 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1613 do {
1614
1615/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1616DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1617{
1618 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1619 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1620
1621 /* Get the eflags. */
1622 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1623 kIemNativeGstRegUse_ReadOnly);
1624
1625 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1626 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1627
1628 /* Test and jump. */
1629 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1630
1631 /* Free but don't flush the EFlags register. */
1632 iemNativeRegFreeTmp(pReNative, idxEflReg);
1633
1634 /* Make a copy of the core state now as we start the if-block. */
1635 iemNativeCondStartIfBlock(pReNative, off);
1636
1637 return off;
1638}
1639
1640
1641#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1642 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1643 do {
1644
1645/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1646DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1647{
1648 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1649 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1650
1651 /* Get the eflags. */
1652 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1653 kIemNativeGstRegUse_ReadOnly);
1654
1655 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1656 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1657
1658 /* Test and jump. */
1659 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1660
1661 /* Free but don't flush the EFlags register. */
1662 iemNativeRegFreeTmp(pReNative, idxEflReg);
1663
1664 /* Make a copy of the core state now as we start the if-block. */
1665 iemNativeCondStartIfBlock(pReNative, off);
1666
1667 return off;
1668}
1669
1670
1671#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1672 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1673 do {
1674
1675#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1676 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1677 do {
1678
1679/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1680DECL_INLINE_THROW(uint32_t)
1681iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1682 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1683{
1684 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1685 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1686
1687 /* Get the eflags. */
1688 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1689 kIemNativeGstRegUse_ReadOnly);
1690
1691 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1692 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1693
1694 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1695 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1696 Assert(iBitNo1 != iBitNo2);
1697
1698#ifdef RT_ARCH_AMD64
1699 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1700
1701 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1702 if (iBitNo1 > iBitNo2)
1703 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1704 else
1705 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1706 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1707
1708#elif defined(RT_ARCH_ARM64)
1709 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1710 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1711
1712 /* and tmpreg, eflreg, #1<<iBitNo1 */
1713 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1714
1715 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1716 if (iBitNo1 > iBitNo2)
1717 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1718 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1719 else
1720 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1721 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1722
1723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1724
1725#else
1726# error "Port me"
1727#endif
1728
1729 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1730 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1731 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1732
1733 /* Free but don't flush the EFlags and tmp registers. */
1734 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1735 iemNativeRegFreeTmp(pReNative, idxEflReg);
1736
1737 /* Make a copy of the core state now as we start the if-block. */
1738 iemNativeCondStartIfBlock(pReNative, off);
1739
1740 return off;
1741}
1742
1743
1744#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1745 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1746 do {
1747
1748#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1749 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1750 do {
1751
1752/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1753 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1754DECL_INLINE_THROW(uint32_t)
1755iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1756 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1757{
1758 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1759 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1760
1761 /* We need an if-block label for the non-inverted variant. */
1762 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1763 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1764
1765 /* Get the eflags. */
1766 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1767 kIemNativeGstRegUse_ReadOnly);
1768
1769 /* Translate the flag masks to bit numbers. */
1770 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1771 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1772
1773 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1774 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1775 Assert(iBitNo1 != iBitNo);
1776
1777 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1778 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1779 Assert(iBitNo2 != iBitNo);
1780 Assert(iBitNo2 != iBitNo1);
1781
1782#ifdef RT_ARCH_AMD64
1783 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1784#elif defined(RT_ARCH_ARM64)
1785 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1786#endif
1787
1788 /* Check for the lone bit first. */
1789 if (!fInverted)
1790 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1791 else
1792 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1793
1794 /* Then extract and compare the other two bits. */
1795#ifdef RT_ARCH_AMD64
1796 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1797 if (iBitNo1 > iBitNo2)
1798 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1799 else
1800 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1801 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1802
1803#elif defined(RT_ARCH_ARM64)
1804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1805
1806 /* and tmpreg, eflreg, #1<<iBitNo1 */
1807 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1808
1809 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1810 if (iBitNo1 > iBitNo2)
1811 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1812 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1813 else
1814 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1815 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1816
1817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1818
1819#else
1820# error "Port me"
1821#endif
1822
1823 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1824 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1825 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1826
1827 /* Free but don't flush the EFlags and tmp registers. */
1828 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1829 iemNativeRegFreeTmp(pReNative, idxEflReg);
1830
1831 /* Make a copy of the core state now as we start the if-block. */
1832 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1833
1834 return off;
1835}
1836
1837
1838#define IEM_MC_IF_CX_IS_NZ() \
1839 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1840 do {
1841
1842/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1843DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1844{
1845 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1846
1847 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1848 kIemNativeGstRegUse_ReadOnly);
1849 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1850 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1851
1852 iemNativeCondStartIfBlock(pReNative, off);
1853 return off;
1854}
1855
1856
1857#define IEM_MC_IF_ECX_IS_NZ() \
1858 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1859 do {
1860
1861#define IEM_MC_IF_RCX_IS_NZ() \
1862 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1863 do {
1864
1865/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1866DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1867{
1868 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1869
1870 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1871 kIemNativeGstRegUse_ReadOnly);
1872 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1873 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1874
1875 iemNativeCondStartIfBlock(pReNative, off);
1876 return off;
1877}
1878
1879
1880#define IEM_MC_IF_CX_IS_NOT_ONE() \
1881 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1882 do {
1883
1884/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1885DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1886{
1887 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1888
1889 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1890 kIemNativeGstRegUse_ReadOnly);
1891#ifdef RT_ARCH_AMD64
1892 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1893#else
1894 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1895 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1896 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1897#endif
1898 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1899
1900 iemNativeCondStartIfBlock(pReNative, off);
1901 return off;
1902}
1903
1904
1905#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1906 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1907 do {
1908
1909#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1910 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1911 do {
1912
1913/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1914DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1915{
1916 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1917
1918 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1919 kIemNativeGstRegUse_ReadOnly);
1920 if (f64Bit)
1921 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1922 else
1923 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1924 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1925
1926 iemNativeCondStartIfBlock(pReNative, off);
1927 return off;
1928}
1929
1930
1931#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1932 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1933 do {
1934
1935#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1936 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1937 do {
1938
1939/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1940 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1941DECL_INLINE_THROW(uint32_t)
1942iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1943{
1944 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1945 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1946
1947 /* We have to load both RCX and EFLAGS before we can start branching,
1948 otherwise we'll end up in the else-block with an inconsistent
1949 register allocator state.
1950 Doing EFLAGS first as it's more likely to be loaded, right? */
1951 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1952 kIemNativeGstRegUse_ReadOnly);
1953 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1954 kIemNativeGstRegUse_ReadOnly);
1955
1956 /** @todo we could reduce this to a single branch instruction by spending a
1957 * temporary register and some setnz stuff. Not sure if loops are
1958 * worth it. */
1959 /* Check CX. */
1960#ifdef RT_ARCH_AMD64
1961 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1962#else
1963 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1964 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1965 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1966#endif
1967
1968 /* Check the EFlags bit. */
1969 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1970 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1971 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1972 !fCheckIfSet /*fJmpIfSet*/);
1973
1974 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1975 iemNativeRegFreeTmp(pReNative, idxEflReg);
1976
1977 iemNativeCondStartIfBlock(pReNative, off);
1978 return off;
1979}
1980
1981
1982#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1983 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1984 do {
1985
1986#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1987 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1988 do {
1989
1990#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1991 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1992 do {
1993
1994#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1995 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1996 do {
1997
1998/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1999 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
2000 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
2001 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2002DECL_INLINE_THROW(uint32_t)
2003iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2004 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
2005{
2006 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2007 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2008
2009 /* We have to load both RCX and EFLAGS before we can start branching,
2010 otherwise we'll end up in the else-block with an inconsistent
2011 register allocator state.
2012 Doing EFLAGS first as it's more likely to be loaded, right? */
2013 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2014 kIemNativeGstRegUse_ReadOnly);
2015 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2016 kIemNativeGstRegUse_ReadOnly);
2017
2018 /** @todo we could reduce this to a single branch instruction by spending a
2019 * temporary register and some setnz stuff. Not sure if loops are
2020 * worth it. */
2021 /* Check RCX/ECX. */
2022 if (f64Bit)
2023 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2024 else
2025 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2026
2027 /* Check the EFlags bit. */
2028 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2029 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2030 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
2031 !fCheckIfSet /*fJmpIfSet*/);
2032
2033 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2034 iemNativeRegFreeTmp(pReNative, idxEflReg);
2035
2036 iemNativeCondStartIfBlock(pReNative, off);
2037 return off;
2038}
2039
2040
2041#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
2042 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
2043 do {
2044
2045/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
2046DECL_INLINE_THROW(uint32_t)
2047iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
2048{
2049 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2050
2051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
2052 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
2053 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2054 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2055
2056 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
2057
2058 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
2059
2060 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
2061
2062 iemNativeCondStartIfBlock(pReNative, off);
2063 return off;
2064}
2065
2066
2067#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
2068 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
2069 do {
2070
2071/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
2072DECL_INLINE_THROW(uint32_t)
2073iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
2074{
2075 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2076 Assert(iGReg < 16);
2077
2078 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2079 kIemNativeGstRegUse_ReadOnly);
2080
2081 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2082
2083 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2084
2085 iemNativeCondStartIfBlock(pReNative, off);
2086 return off;
2087}
2088
2089
2090#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2091
2092#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
2093 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
2094 do {
2095
2096/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
2097DECL_INLINE_THROW(uint32_t)
2098iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2099{
2100 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2101
2102 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2103 kIemNativeGstRegUse_Calculation);
2104 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2105
2106 /* mov tmp0, mxcsr */
2107 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2108 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
2109 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
2110 /* mxcsr &= X86_MXCSR_XCPT_MASK */
2111 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
2112 /* mxcsr ~= mxcsr */
2113 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
2114 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
2115 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
2116 /* tmp0 &= mxcsr */
2117 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2118
2119 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
2120 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
2121 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2122
2123 iemNativeCondStartIfBlock(pReNative, off);
2124 return off;
2125}
2126
2127#endif
2128
2129
2130/*********************************************************************************************************************************
2131* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2132*********************************************************************************************************************************/
2133
2134#define IEM_MC_NOREF(a_Name) \
2135 RT_NOREF_PV(a_Name)
2136
2137#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2138 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2139
2140#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2141 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2142
2143#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2144 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2145
2146#define IEM_MC_LOCAL(a_Type, a_Name) \
2147 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2148
2149#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2150 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2151
2152#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2153 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2154
2155
2156/**
2157 * Sets the host register for @a idxVarRc to @a idxReg.
2158 *
2159 * The register must not be allocated. Any guest register shadowing will be
2160 * implictly dropped by this call.
2161 *
2162 * The variable must not have any register associated with it (causes
2163 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2164 * implied.
2165 *
2166 * @returns idxReg
2167 * @param pReNative The recompiler state.
2168 * @param idxVar The variable.
2169 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2170 * @param off For recording in debug info.
2171 *
2172 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2173 */
2174DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2175{
2176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2177 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2178 Assert(!pVar->fRegAcquired);
2179 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2180 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2181 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2182
2183 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2184 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2185
2186 iemNativeVarSetKindToStack(pReNative, idxVar);
2187 pVar->idxReg = idxReg;
2188
2189 return idxReg;
2190}
2191
2192
2193/**
2194 * A convenient helper function.
2195 */
2196DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2197 uint8_t idxReg, uint32_t *poff)
2198{
2199 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2200 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2201 return idxReg;
2202}
2203
2204
2205/**
2206 * This is called by IEM_MC_END() to clean up all variables.
2207 */
2208DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2209{
2210 uint32_t const bmVars = pReNative->Core.bmVars;
2211 if (bmVars != 0)
2212 iemNativeVarFreeAllSlow(pReNative, bmVars);
2213 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2214 Assert(pReNative->Core.bmStack == 0);
2215}
2216
2217
2218#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2219
2220/**
2221 * This is called by IEM_MC_FREE_LOCAL.
2222 */
2223DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2224{
2225 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2226 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2227 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2228}
2229
2230
2231#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2232
2233/**
2234 * This is called by IEM_MC_FREE_ARG.
2235 */
2236DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2237{
2238 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2239 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2240 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2241}
2242
2243
2244#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2245
2246/**
2247 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2248 */
2249DECL_INLINE_THROW(uint32_t)
2250iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2251{
2252 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2253 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2254 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2255 Assert( pVarDst->cbVar == sizeof(uint16_t)
2256 || pVarDst->cbVar == sizeof(uint32_t));
2257
2258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2259 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2260 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2261 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2262 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2263
2264 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2265
2266 /*
2267 * Special case for immediates.
2268 */
2269 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2270 {
2271 switch (pVarDst->cbVar)
2272 {
2273 case sizeof(uint16_t):
2274 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2275 break;
2276 case sizeof(uint32_t):
2277 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2278 break;
2279 default: AssertFailed(); break;
2280 }
2281 }
2282 else
2283 {
2284 /*
2285 * The generic solution for now.
2286 */
2287 /** @todo optimize this by having the python script make sure the source
2288 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2289 * statement. Then we could just transfer the register assignments. */
2290 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2291 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2292 switch (pVarDst->cbVar)
2293 {
2294 case sizeof(uint16_t):
2295 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2296 break;
2297 case sizeof(uint32_t):
2298 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2299 break;
2300 default: AssertFailed(); break;
2301 }
2302 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2303 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2304 }
2305 return off;
2306}
2307
2308
2309
2310/*********************************************************************************************************************************
2311* Emitters for IEM_MC_CALL_CIMPL_XXX *
2312*********************************************************************************************************************************/
2313
2314/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2315DECL_INLINE_THROW(uint32_t)
2316iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2317 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2318
2319{
2320 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2321
2322#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2323 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2324 when a calls clobber any of the relevatn control registers. */
2325# if 1
2326 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2327 {
2328 /* Likely as long as call+ret are done via cimpl. */
2329 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2330 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2331 }
2332 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2333 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2334 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2335 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2336 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2337 else
2338 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2339 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2340 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2341
2342# else
2343 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2344 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2345 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2346 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2347 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2348 || pfnCImpl == (uintptr_t)iemCImpl_callf
2349 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2350 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2351 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2352 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2353 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2354# endif
2355#endif
2356
2357 /*
2358 * Do all the call setup and cleanup.
2359 */
2360 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2361
2362 /*
2363 * Load the two or three hidden arguments.
2364 */
2365#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2366 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2367 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2368 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2369#else
2370 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2371 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2372#endif
2373
2374 /*
2375 * Make the call and check the return code.
2376 *
2377 * Shadow PC copies are always flushed here, other stuff depends on flags.
2378 * Segment and general purpose registers are explictily flushed via the
2379 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2380 * macros.
2381 */
2382 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2383#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2384 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2385#endif
2386 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2387 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2388 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2389 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2390
2391 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2392}
2393
2394
2395#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2396 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2397
2398/** Emits code for IEM_MC_CALL_CIMPL_1. */
2399DECL_INLINE_THROW(uint32_t)
2400iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2401 uintptr_t pfnCImpl, uint8_t idxArg0)
2402{
2403 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2404 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2405}
2406
2407
2408#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2409 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2410
2411/** Emits code for IEM_MC_CALL_CIMPL_2. */
2412DECL_INLINE_THROW(uint32_t)
2413iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2414 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2415{
2416 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2417 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2418 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2419}
2420
2421
2422#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2423 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2424 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2425
2426/** Emits code for IEM_MC_CALL_CIMPL_3. */
2427DECL_INLINE_THROW(uint32_t)
2428iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2429 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2430{
2431 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2432 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2433 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2434 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2435}
2436
2437
2438#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2439 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2440 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2441
2442/** Emits code for IEM_MC_CALL_CIMPL_4. */
2443DECL_INLINE_THROW(uint32_t)
2444iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2445 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2446{
2447 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2448 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2449 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2450 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2451 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2452}
2453
2454
2455#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2456 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2457 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2458
2459/** Emits code for IEM_MC_CALL_CIMPL_4. */
2460DECL_INLINE_THROW(uint32_t)
2461iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2462 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2463{
2464 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2465 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2466 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2467 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2468 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2469 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2470}
2471
2472
2473/** Recompiler debugging: Flush guest register shadow copies. */
2474#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2475
2476
2477
2478/*********************************************************************************************************************************
2479* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2480*********************************************************************************************************************************/
2481
2482/**
2483 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2484 */
2485DECL_INLINE_THROW(uint32_t)
2486iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2487 uintptr_t pfnAImpl, uint8_t cArgs)
2488{
2489 if (idxVarRc != UINT8_MAX)
2490 {
2491 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2492 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2493 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2494 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2495 }
2496
2497 /*
2498 * Do all the call setup and cleanup.
2499 *
2500 * It is only required to flush pending guest register writes in call volatile registers as
2501 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2502 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2503 * no matter the fFlushPendingWrites parameter.
2504 */
2505 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2506
2507 /*
2508 * Make the call and update the return code variable if we've got one.
2509 */
2510 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2511 if (idxVarRc != UINT8_MAX)
2512 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2513
2514 return off;
2515}
2516
2517
2518
2519#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2520 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2521
2522#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2523 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2524
2525/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2526DECL_INLINE_THROW(uint32_t)
2527iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2528{
2529 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2530}
2531
2532
2533#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2534 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2535
2536#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2537 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2538
2539/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2540DECL_INLINE_THROW(uint32_t)
2541iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2542{
2543 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2544 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2545}
2546
2547
2548#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2549 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2550
2551#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2552 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2553
2554/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2555DECL_INLINE_THROW(uint32_t)
2556iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2557 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2558{
2559 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2560 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2561 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2562}
2563
2564
2565#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2566 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2567
2568#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2569 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2570
2571/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2572DECL_INLINE_THROW(uint32_t)
2573iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2574 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2575{
2576 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2577 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2578 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2579 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2580}
2581
2582
2583#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2584 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2585
2586#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2587 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2588
2589/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2590DECL_INLINE_THROW(uint32_t)
2591iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2592 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2593{
2594 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2595 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2596 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2597 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2598 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2599}
2600
2601
2602
2603/*********************************************************************************************************************************
2604* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2605*********************************************************************************************************************************/
2606
2607#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2608 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2609
2610#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2611 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2612
2613#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2614 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2615
2616#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2617 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2618
2619
2620/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2621 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2622DECL_INLINE_THROW(uint32_t)
2623iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2624{
2625 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2626 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2627 Assert(iGRegEx < 20);
2628
2629 /* Same discussion as in iemNativeEmitFetchGregU16 */
2630 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2631 kIemNativeGstRegUse_ReadOnly);
2632
2633 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2634 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2635
2636 /* The value is zero-extended to the full 64-bit host register width. */
2637 if (iGRegEx < 16)
2638 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2639 else
2640 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2641
2642 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2643 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2644 return off;
2645}
2646
2647
2648#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2649 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2650
2651#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2652 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2653
2654#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2655 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2656
2657/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2658DECL_INLINE_THROW(uint32_t)
2659iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2660{
2661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2663 Assert(iGRegEx < 20);
2664
2665 /* Same discussion as in iemNativeEmitFetchGregU16 */
2666 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2667 kIemNativeGstRegUse_ReadOnly);
2668
2669 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2670 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2671
2672 if (iGRegEx < 16)
2673 {
2674 switch (cbSignExtended)
2675 {
2676 case sizeof(uint16_t):
2677 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2678 break;
2679 case sizeof(uint32_t):
2680 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2681 break;
2682 case sizeof(uint64_t):
2683 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2684 break;
2685 default: AssertFailed(); break;
2686 }
2687 }
2688 else
2689 {
2690 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2691 switch (cbSignExtended)
2692 {
2693 case sizeof(uint16_t):
2694 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2695 break;
2696 case sizeof(uint32_t):
2697 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2698 break;
2699 case sizeof(uint64_t):
2700 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2701 break;
2702 default: AssertFailed(); break;
2703 }
2704 }
2705
2706 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2707 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2708 return off;
2709}
2710
2711
2712
2713#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2714 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2715
2716#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2717 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2718
2719#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2720 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2721
2722/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2723DECL_INLINE_THROW(uint32_t)
2724iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2725{
2726 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2727 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2728 Assert(iGReg < 16);
2729
2730 /*
2731 * We can either just load the low 16-bit of the GPR into a host register
2732 * for the variable, or we can do so via a shadow copy host register. The
2733 * latter will avoid having to reload it if it's being stored later, but
2734 * will waste a host register if it isn't touched again. Since we don't
2735 * know what going to happen, we choose the latter for now.
2736 */
2737 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2738 kIemNativeGstRegUse_ReadOnly);
2739
2740 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2741 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2742 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2743 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2744
2745 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2746 return off;
2747}
2748
2749
2750#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2751 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2752
2753#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2754 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2755
2756/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2757DECL_INLINE_THROW(uint32_t)
2758iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2759{
2760 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2761 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2762 Assert(iGReg < 16);
2763
2764 /*
2765 * We can either just load the low 16-bit of the GPR into a host register
2766 * for the variable, or we can do so via a shadow copy host register. The
2767 * latter will avoid having to reload it if it's being stored later, but
2768 * will waste a host register if it isn't touched again. Since we don't
2769 * know what going to happen, we choose the latter for now.
2770 */
2771 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2772 kIemNativeGstRegUse_ReadOnly);
2773
2774 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2775 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2776 if (cbSignExtended == sizeof(uint32_t))
2777 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2778 else
2779 {
2780 Assert(cbSignExtended == sizeof(uint64_t));
2781 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2782 }
2783 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2784
2785 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2786 return off;
2787}
2788
2789
2790#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2791 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2792
2793#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2794 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2795
2796/** Emits code for IEM_MC_FETCH_GREG_U32. */
2797DECL_INLINE_THROW(uint32_t)
2798iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2799{
2800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2802 Assert(iGReg < 16);
2803
2804 /*
2805 * We can either just load the low 16-bit of the GPR into a host register
2806 * for the variable, or we can do so via a shadow copy host register. The
2807 * latter will avoid having to reload it if it's being stored later, but
2808 * will waste a host register if it isn't touched again. Since we don't
2809 * know what going to happen, we choose the latter for now.
2810 */
2811 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2812 kIemNativeGstRegUse_ReadOnly);
2813
2814 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2815 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2816 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2817 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2818
2819 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2820 return off;
2821}
2822
2823
2824#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2825 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2826
2827/** Emits code for IEM_MC_FETCH_GREG_U32. */
2828DECL_INLINE_THROW(uint32_t)
2829iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2830{
2831 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2832 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2833 Assert(iGReg < 16);
2834
2835 /*
2836 * We can either just load the low 32-bit of the GPR into a host register
2837 * for the variable, or we can do so via a shadow copy host register. The
2838 * latter will avoid having to reload it if it's being stored later, but
2839 * will waste a host register if it isn't touched again. Since we don't
2840 * know what going to happen, we choose the latter for now.
2841 */
2842 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2843 kIemNativeGstRegUse_ReadOnly);
2844
2845 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2846 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2847 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2848 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2849
2850 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2851 return off;
2852}
2853
2854
2855#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2856 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2857
2858#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2859 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2860
2861/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2862 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2863DECL_INLINE_THROW(uint32_t)
2864iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2865{
2866 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2867 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2868 Assert(iGReg < 16);
2869
2870 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2871 kIemNativeGstRegUse_ReadOnly);
2872
2873 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2874 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2875 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2876 /** @todo name the register a shadow one already? */
2877 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2878
2879 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2880 return off;
2881}
2882
2883
2884#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2885#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
2886 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
2887
2888/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
2889DECL_INLINE_THROW(uint32_t)
2890iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
2891{
2892 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2893 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
2894 Assert(iGRegLo < 16 && iGRegHi < 16);
2895
2896 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
2897 kIemNativeGstRegUse_ReadOnly);
2898 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
2899 kIemNativeGstRegUse_ReadOnly);
2900
2901 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2902 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
2903 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
2904 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
2905
2906 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
2907 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
2908 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
2909 return off;
2910}
2911#endif
2912
2913
2914/*********************************************************************************************************************************
2915* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2916*********************************************************************************************************************************/
2917
2918#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2919 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2920
2921/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2922DECL_INLINE_THROW(uint32_t)
2923iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2924{
2925 Assert(iGRegEx < 20);
2926 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2927 kIemNativeGstRegUse_ForUpdate);
2928#ifdef RT_ARCH_AMD64
2929 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2930
2931 /* To the lowest byte of the register: mov r8, imm8 */
2932 if (iGRegEx < 16)
2933 {
2934 if (idxGstTmpReg >= 8)
2935 pbCodeBuf[off++] = X86_OP_REX_B;
2936 else if (idxGstTmpReg >= 4)
2937 pbCodeBuf[off++] = X86_OP_REX;
2938 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2939 pbCodeBuf[off++] = u8Value;
2940 }
2941 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2942 else if (idxGstTmpReg < 4)
2943 {
2944 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2945 pbCodeBuf[off++] = u8Value;
2946 }
2947 else
2948 {
2949 /* ror reg64, 8 */
2950 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2951 pbCodeBuf[off++] = 0xc1;
2952 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2953 pbCodeBuf[off++] = 8;
2954
2955 /* mov reg8, imm8 */
2956 if (idxGstTmpReg >= 8)
2957 pbCodeBuf[off++] = X86_OP_REX_B;
2958 else if (idxGstTmpReg >= 4)
2959 pbCodeBuf[off++] = X86_OP_REX;
2960 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2961 pbCodeBuf[off++] = u8Value;
2962
2963 /* rol reg64, 8 */
2964 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2965 pbCodeBuf[off++] = 0xc1;
2966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2967 pbCodeBuf[off++] = 8;
2968 }
2969
2970#elif defined(RT_ARCH_ARM64)
2971 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2972 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2973 if (iGRegEx < 16)
2974 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2975 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2976 else
2977 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2978 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2979 iemNativeRegFreeTmp(pReNative, idxImmReg);
2980
2981#else
2982# error "Port me!"
2983#endif
2984
2985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2986
2987#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2988 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2989#endif
2990
2991 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2992 return off;
2993}
2994
2995
2996#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2997 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2998
2999/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
3000DECL_INLINE_THROW(uint32_t)
3001iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
3002{
3003 Assert(iGRegEx < 20);
3004 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3005
3006 /*
3007 * If it's a constant value (unlikely) we treat this as a
3008 * IEM_MC_STORE_GREG_U8_CONST statement.
3009 */
3010 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3011 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3012 { /* likely */ }
3013 else
3014 {
3015 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3016 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3017 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
3018 }
3019
3020 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3021 kIemNativeGstRegUse_ForUpdate);
3022 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3023
3024#ifdef RT_ARCH_AMD64
3025 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
3026 if (iGRegEx < 16)
3027 {
3028 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3029 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3030 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3031 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3032 pbCodeBuf[off++] = X86_OP_REX;
3033 pbCodeBuf[off++] = 0x8a;
3034 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3035 }
3036 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
3037 else if (idxGstTmpReg < 4 && idxVarReg < 4)
3038 {
3039 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
3040 pbCodeBuf[off++] = 0x8a;
3041 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
3042 }
3043 else
3044 {
3045 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
3046
3047 /* ror reg64, 8 */
3048 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3049 pbCodeBuf[off++] = 0xc1;
3050 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3051 pbCodeBuf[off++] = 8;
3052
3053 /* mov reg8, reg8(r/m) */
3054 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3055 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3056 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3057 pbCodeBuf[off++] = X86_OP_REX;
3058 pbCodeBuf[off++] = 0x8a;
3059 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3060
3061 /* rol reg64, 8 */
3062 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3063 pbCodeBuf[off++] = 0xc1;
3064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3065 pbCodeBuf[off++] = 8;
3066 }
3067
3068#elif defined(RT_ARCH_ARM64)
3069 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
3070 or
3071 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
3072 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3073 if (iGRegEx < 16)
3074 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
3075 else
3076 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
3077
3078#else
3079# error "Port me!"
3080#endif
3081 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3082
3083 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3084
3085#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3086 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3087#endif
3088 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3089 return off;
3090}
3091
3092
3093
3094#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
3095 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
3096
3097/** Emits code for IEM_MC_STORE_GREG_U16. */
3098DECL_INLINE_THROW(uint32_t)
3099iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
3100{
3101 Assert(iGReg < 16);
3102 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3103 kIemNativeGstRegUse_ForUpdate);
3104#ifdef RT_ARCH_AMD64
3105 /* mov reg16, imm16 */
3106 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3107 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3108 if (idxGstTmpReg >= 8)
3109 pbCodeBuf[off++] = X86_OP_REX_B;
3110 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3111 pbCodeBuf[off++] = RT_BYTE1(uValue);
3112 pbCodeBuf[off++] = RT_BYTE2(uValue);
3113
3114#elif defined(RT_ARCH_ARM64)
3115 /* movk xdst, #uValue, lsl #0 */
3116 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3117 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3118
3119#else
3120# error "Port me!"
3121#endif
3122
3123 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3124
3125#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3126 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3127#endif
3128 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3129 return off;
3130}
3131
3132
3133#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3134 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3135
3136/** Emits code for IEM_MC_STORE_GREG_U16. */
3137DECL_INLINE_THROW(uint32_t)
3138iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3139{
3140 Assert(iGReg < 16);
3141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3142
3143 /*
3144 * If it's a constant value (unlikely) we treat this as a
3145 * IEM_MC_STORE_GREG_U16_CONST statement.
3146 */
3147 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3148 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3149 { /* likely */ }
3150 else
3151 {
3152 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3153 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3154 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3155 }
3156
3157 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3158 kIemNativeGstRegUse_ForUpdate);
3159
3160#ifdef RT_ARCH_AMD64
3161 /* mov reg16, reg16 or [mem16] */
3162 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3163 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3164 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3165 {
3166 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3167 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3168 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3169 pbCodeBuf[off++] = 0x8b;
3170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3171 }
3172 else
3173 {
3174 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3175 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3176 if (idxGstTmpReg >= 8)
3177 pbCodeBuf[off++] = X86_OP_REX_R;
3178 pbCodeBuf[off++] = 0x8b;
3179 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3180 }
3181
3182#elif defined(RT_ARCH_ARM64)
3183 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3184 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3185 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3186 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3187 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3188
3189#else
3190# error "Port me!"
3191#endif
3192
3193 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3194
3195#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3196 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3197#endif
3198 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3199 return off;
3200}
3201
3202
3203#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3204 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3205
3206/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3207DECL_INLINE_THROW(uint32_t)
3208iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3209{
3210 Assert(iGReg < 16);
3211 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3212 kIemNativeGstRegUse_ForFullWrite);
3213 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3214#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3215 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3216#endif
3217 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3218 return off;
3219}
3220
3221
3222#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3223 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3224
3225/** Emits code for IEM_MC_STORE_GREG_U32. */
3226DECL_INLINE_THROW(uint32_t)
3227iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3228{
3229 Assert(iGReg < 16);
3230 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3231
3232 /*
3233 * If it's a constant value (unlikely) we treat this as a
3234 * IEM_MC_STORE_GREG_U32_CONST statement.
3235 */
3236 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3237 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3238 { /* likely */ }
3239 else
3240 {
3241 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3242 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3243 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3244 }
3245
3246 /*
3247 * For the rest we allocate a guest register for the variable and writes
3248 * it to the CPUMCTX structure.
3249 */
3250 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3251#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3252 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3253#else
3254 RT_NOREF(idxVarReg);
3255#endif
3256#ifdef VBOX_STRICT
3257 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3258#endif
3259 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3260 return off;
3261}
3262
3263
3264#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3265 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3266
3267/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3268DECL_INLINE_THROW(uint32_t)
3269iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3270{
3271 Assert(iGReg < 16);
3272 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3273 kIemNativeGstRegUse_ForFullWrite);
3274 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3275#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3276 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3277#endif
3278 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3279 return off;
3280}
3281
3282
3283#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3284 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3285
3286#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3287 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3288
3289/** Emits code for IEM_MC_STORE_GREG_U64. */
3290DECL_INLINE_THROW(uint32_t)
3291iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3292{
3293 Assert(iGReg < 16);
3294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3295
3296 /*
3297 * If it's a constant value (unlikely) we treat this as a
3298 * IEM_MC_STORE_GREG_U64_CONST statement.
3299 */
3300 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3301 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3302 { /* likely */ }
3303 else
3304 {
3305 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3307 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3308 }
3309
3310 /*
3311 * For the rest we allocate a guest register for the variable and writes
3312 * it to the CPUMCTX structure.
3313 */
3314 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3315#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3316 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3317#else
3318 RT_NOREF(idxVarReg);
3319#endif
3320 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3321 return off;
3322}
3323
3324
3325#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3326 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3327
3328/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3329DECL_INLINE_THROW(uint32_t)
3330iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3331{
3332 Assert(iGReg < 16);
3333 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3334 kIemNativeGstRegUse_ForUpdate);
3335 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3336#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3337 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3338#endif
3339 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3340 return off;
3341}
3342
3343
3344#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3345#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
3346 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
3347
3348/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3349DECL_INLINE_THROW(uint32_t)
3350iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
3351{
3352 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3353 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3354 Assert(iGRegLo < 16 && iGRegHi < 16);
3355
3356 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3357 kIemNativeGstRegUse_ForFullWrite);
3358 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3359 kIemNativeGstRegUse_ForFullWrite);
3360
3361 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3362 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
3363 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
3364 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
3365
3366 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3367 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3368 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3369 return off;
3370}
3371#endif
3372
3373
3374/*********************************************************************************************************************************
3375* General purpose register manipulation (add, sub). *
3376*********************************************************************************************************************************/
3377
3378#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3379 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3380
3381/** Emits code for IEM_MC_ADD_GREG_U16. */
3382DECL_INLINE_THROW(uint32_t)
3383iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3384{
3385 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3386 kIemNativeGstRegUse_ForUpdate);
3387
3388#ifdef RT_ARCH_AMD64
3389 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3390 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3391 if (idxGstTmpReg >= 8)
3392 pbCodeBuf[off++] = X86_OP_REX_B;
3393 if (uAddend == 1)
3394 {
3395 pbCodeBuf[off++] = 0xff; /* inc */
3396 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3397 }
3398 else
3399 {
3400 pbCodeBuf[off++] = 0x81;
3401 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3402 pbCodeBuf[off++] = uAddend;
3403 pbCodeBuf[off++] = 0;
3404 }
3405
3406#else
3407 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3408 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3409
3410 /* sub tmp, gstgrp, uAddend */
3411 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3412
3413 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3414 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3415
3416 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3417#endif
3418
3419 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3420
3421#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3422 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3423#endif
3424
3425 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3426 return off;
3427}
3428
3429
3430#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3431 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3432
3433#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3434 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3435
3436/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3437DECL_INLINE_THROW(uint32_t)
3438iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3439{
3440 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3441 kIemNativeGstRegUse_ForUpdate);
3442
3443#ifdef RT_ARCH_AMD64
3444 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3445 if (f64Bit)
3446 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3447 else if (idxGstTmpReg >= 8)
3448 pbCodeBuf[off++] = X86_OP_REX_B;
3449 if (uAddend == 1)
3450 {
3451 pbCodeBuf[off++] = 0xff; /* inc */
3452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3453 }
3454 else if (uAddend < 128)
3455 {
3456 pbCodeBuf[off++] = 0x83; /* add */
3457 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3458 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3459 }
3460 else
3461 {
3462 pbCodeBuf[off++] = 0x81; /* add */
3463 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3464 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3465 pbCodeBuf[off++] = 0;
3466 pbCodeBuf[off++] = 0;
3467 pbCodeBuf[off++] = 0;
3468 }
3469
3470#else
3471 /* sub tmp, gstgrp, uAddend */
3472 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3473 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3474
3475#endif
3476
3477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3478
3479#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3480 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3481#endif
3482
3483 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3484 return off;
3485}
3486
3487
3488
3489#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3490 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3491
3492/** Emits code for IEM_MC_SUB_GREG_U16. */
3493DECL_INLINE_THROW(uint32_t)
3494iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3495{
3496 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3497 kIemNativeGstRegUse_ForUpdate);
3498
3499#ifdef RT_ARCH_AMD64
3500 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3501 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3502 if (idxGstTmpReg >= 8)
3503 pbCodeBuf[off++] = X86_OP_REX_B;
3504 if (uSubtrahend == 1)
3505 {
3506 pbCodeBuf[off++] = 0xff; /* dec */
3507 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3508 }
3509 else
3510 {
3511 pbCodeBuf[off++] = 0x81;
3512 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3513 pbCodeBuf[off++] = uSubtrahend;
3514 pbCodeBuf[off++] = 0;
3515 }
3516
3517#else
3518 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3519 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3520
3521 /* sub tmp, gstgrp, uSubtrahend */
3522 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3523
3524 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3525 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3526
3527 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3528#endif
3529
3530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3531
3532#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3533 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3534#endif
3535
3536 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3537 return off;
3538}
3539
3540
3541#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3542 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3543
3544#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3545 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3546
3547/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3548DECL_INLINE_THROW(uint32_t)
3549iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3550{
3551 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3552 kIemNativeGstRegUse_ForUpdate);
3553
3554#ifdef RT_ARCH_AMD64
3555 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3556 if (f64Bit)
3557 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3558 else if (idxGstTmpReg >= 8)
3559 pbCodeBuf[off++] = X86_OP_REX_B;
3560 if (uSubtrahend == 1)
3561 {
3562 pbCodeBuf[off++] = 0xff; /* dec */
3563 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3564 }
3565 else if (uSubtrahend < 128)
3566 {
3567 pbCodeBuf[off++] = 0x83; /* sub */
3568 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3569 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3570 }
3571 else
3572 {
3573 pbCodeBuf[off++] = 0x81; /* sub */
3574 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3575 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3576 pbCodeBuf[off++] = 0;
3577 pbCodeBuf[off++] = 0;
3578 pbCodeBuf[off++] = 0;
3579 }
3580
3581#else
3582 /* sub tmp, gstgrp, uSubtrahend */
3583 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3584 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3585
3586#endif
3587
3588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3589
3590#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3591 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3592#endif
3593
3594 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3595 return off;
3596}
3597
3598
3599#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3600 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3601
3602#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3603 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3604
3605#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3606 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3607
3608#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3609 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3610
3611/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3612DECL_INLINE_THROW(uint32_t)
3613iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3614{
3615#ifdef VBOX_STRICT
3616 switch (cbMask)
3617 {
3618 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3619 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3620 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3621 case sizeof(uint64_t): break;
3622 default: AssertFailedBreak();
3623 }
3624#endif
3625
3626 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3627 kIemNativeGstRegUse_ForUpdate);
3628
3629 switch (cbMask)
3630 {
3631 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3632 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3633 break;
3634 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3635 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3636 break;
3637 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3638 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3639 break;
3640 case sizeof(uint64_t):
3641 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3642 break;
3643 default: AssertFailedBreak();
3644 }
3645
3646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3647
3648#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3649 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3650#endif
3651
3652 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3653 return off;
3654}
3655
3656
3657#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3658 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3659
3660#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3661 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3662
3663#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3664 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3665
3666#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3667 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3668
3669/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3670DECL_INLINE_THROW(uint32_t)
3671iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3672{
3673#ifdef VBOX_STRICT
3674 switch (cbMask)
3675 {
3676 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3677 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3678 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3679 case sizeof(uint64_t): break;
3680 default: AssertFailedBreak();
3681 }
3682#endif
3683
3684 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3685 kIemNativeGstRegUse_ForUpdate);
3686
3687 switch (cbMask)
3688 {
3689 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3690 case sizeof(uint16_t):
3691 case sizeof(uint64_t):
3692 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3693 break;
3694 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3695 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3696 break;
3697 default: AssertFailedBreak();
3698 }
3699
3700 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3701
3702#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3703 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3704#endif
3705
3706 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3707 return off;
3708}
3709
3710
3711/*********************************************************************************************************************************
3712* Local/Argument variable manipulation (add, sub, and, or). *
3713*********************************************************************************************************************************/
3714
3715#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3716 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3717
3718#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3719 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3720
3721#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3722 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3723
3724#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3725 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3726
3727
3728#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3729 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3730
3731#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3732 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3733
3734#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3735 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3736
3737/** Emits code for AND'ing a local and a constant value. */
3738DECL_INLINE_THROW(uint32_t)
3739iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3740{
3741#ifdef VBOX_STRICT
3742 switch (cbMask)
3743 {
3744 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3745 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3746 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3747 case sizeof(uint64_t): break;
3748 default: AssertFailedBreak();
3749 }
3750#endif
3751
3752 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3754
3755 if (cbMask <= sizeof(uint32_t))
3756 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3757 else
3758 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3759
3760 iemNativeVarRegisterRelease(pReNative, idxVar);
3761 return off;
3762}
3763
3764
3765#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3766 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3767
3768#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3769 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3770
3771#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3772 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3773
3774#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3775 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3776
3777/** Emits code for OR'ing a local and a constant value. */
3778DECL_INLINE_THROW(uint32_t)
3779iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3780{
3781#ifdef VBOX_STRICT
3782 switch (cbMask)
3783 {
3784 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3785 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3786 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3787 case sizeof(uint64_t): break;
3788 default: AssertFailedBreak();
3789 }
3790#endif
3791
3792 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3793 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3794
3795 if (cbMask <= sizeof(uint32_t))
3796 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3797 else
3798 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3799
3800 iemNativeVarRegisterRelease(pReNative, idxVar);
3801 return off;
3802}
3803
3804
3805#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3806 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3807
3808#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3809 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3810
3811#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3812 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3813
3814/** Emits code for reversing the byte order in a local value. */
3815DECL_INLINE_THROW(uint32_t)
3816iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3817{
3818 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3820
3821 switch (cbLocal)
3822 {
3823 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3824 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3825 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3826 default: AssertFailedBreak();
3827 }
3828
3829 iemNativeVarRegisterRelease(pReNative, idxVar);
3830 return off;
3831}
3832
3833
3834#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3835 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3836
3837#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3838 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3839
3840#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3841 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3842
3843/** Emits code for shifting left a local value. */
3844DECL_INLINE_THROW(uint32_t)
3845iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3846{
3847#ifdef VBOX_STRICT
3848 switch (cbLocal)
3849 {
3850 case sizeof(uint8_t): Assert(cShift < 8); break;
3851 case sizeof(uint16_t): Assert(cShift < 16); break;
3852 case sizeof(uint32_t): Assert(cShift < 32); break;
3853 case sizeof(uint64_t): Assert(cShift < 64); break;
3854 default: AssertFailedBreak();
3855 }
3856#endif
3857
3858 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3859 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3860
3861 if (cbLocal <= sizeof(uint32_t))
3862 {
3863 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3864 if (cbLocal < sizeof(uint32_t))
3865 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3866 cbLocal == sizeof(uint16_t)
3867 ? UINT32_C(0xffff)
3868 : UINT32_C(0xff));
3869 }
3870 else
3871 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3872
3873 iemNativeVarRegisterRelease(pReNative, idxVar);
3874 return off;
3875}
3876
3877
3878#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3879 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3880
3881#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3882 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3883
3884#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3885 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3886
3887/** Emits code for shifting left a local value. */
3888DECL_INLINE_THROW(uint32_t)
3889iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3890{
3891#ifdef VBOX_STRICT
3892 switch (cbLocal)
3893 {
3894 case sizeof(int8_t): Assert(cShift < 8); break;
3895 case sizeof(int16_t): Assert(cShift < 16); break;
3896 case sizeof(int32_t): Assert(cShift < 32); break;
3897 case sizeof(int64_t): Assert(cShift < 64); break;
3898 default: AssertFailedBreak();
3899 }
3900#endif
3901
3902 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3903 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3904
3905 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3906 if (cbLocal == sizeof(uint8_t))
3907 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3908 else if (cbLocal == sizeof(uint16_t))
3909 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3910
3911 if (cbLocal <= sizeof(uint32_t))
3912 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3913 else
3914 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3915
3916 iemNativeVarRegisterRelease(pReNative, idxVar);
3917 return off;
3918}
3919
3920
3921#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3922 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3923
3924#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3925 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3926
3927#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3928 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3929
3930/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3931DECL_INLINE_THROW(uint32_t)
3932iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3933{
3934 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3935 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3937 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3938
3939 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3940 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3941
3942 /* Need to sign extend the value. */
3943 if (cbLocal <= sizeof(uint32_t))
3944 {
3945/** @todo ARM64: In case of boredone, the extended add instruction can do the
3946 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3947 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3948
3949 switch (cbLocal)
3950 {
3951 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3952 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3953 default: AssertFailed();
3954 }
3955
3956 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3957 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3958 }
3959 else
3960 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3961
3962 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3963 iemNativeVarRegisterRelease(pReNative, idxVar);
3964 return off;
3965}
3966
3967
3968
3969/*********************************************************************************************************************************
3970* EFLAGS *
3971*********************************************************************************************************************************/
3972
3973#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3974# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3975#else
3976# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3977 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3978
3979DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3980{
3981 if (fEflOutput)
3982 {
3983 PVMCPUCC const pVCpu = pReNative->pVCpu;
3984# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3985 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3986 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3987 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3988# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3989 if (fEflOutput & (a_fEfl)) \
3990 { \
3991 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3992 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3993 else \
3994 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3995 } else do { } while (0)
3996# else
3997 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3998 IEMLIVENESSBIT const LivenessClobbered =
3999 {
4000 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4001 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4002 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
4003 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
4004 };
4005 IEMLIVENESSBIT const LivenessDelayable =
4006 {
4007 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4008 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
4009 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4010 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
4011 };
4012# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
4013 if (fEflOutput & (a_fEfl)) \
4014 { \
4015 if (LivenessClobbered.a_fLivenessMember) \
4016 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
4017 else if (LivenessDelayable.a_fLivenessMember) \
4018 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
4019 else \
4020 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
4021 } else do { } while (0)
4022# endif
4023 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
4024 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
4025 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
4026 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
4027 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
4028 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
4029 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
4030# undef CHECK_FLAG_AND_UPDATE_STATS
4031 }
4032 RT_NOREF(fEflInput);
4033}
4034#endif /* VBOX_WITH_STATISTICS */
4035
4036#undef IEM_MC_FETCH_EFLAGS /* should not be used */
4037#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4038 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
4039
4040/** Handles IEM_MC_FETCH_EFLAGS_EX. */
4041DECL_INLINE_THROW(uint32_t)
4042iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
4043 uint32_t fEflInput, uint32_t fEflOutput)
4044{
4045 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
4046 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4047 RT_NOREF(fEflInput, fEflOutput);
4048
4049#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4050# ifdef VBOX_STRICT
4051 if ( pReNative->idxCurCall != 0
4052 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
4053 {
4054 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
4055 uint32_t const fBoth = fEflInput | fEflOutput;
4056# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
4057 AssertMsg( !(fBoth & (a_fElfConst)) \
4058 || (!(fEflInput & (a_fElfConst)) \
4059 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4060 : !(fEflOutput & (a_fElfConst)) \
4061 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4062 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
4063 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
4064 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
4065 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
4066 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
4067 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
4068 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
4069 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
4070 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
4071# undef ASSERT_ONE_EFL
4072 }
4073# endif
4074#endif
4075
4076 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4077
4078 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
4079 * the existing shadow copy. */
4080 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
4081 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4082 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
4083 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4084 return off;
4085}
4086
4087
4088
4089/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
4090 * start using it with custom native code emission (inlining assembly
4091 * instruction helpers). */
4092#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
4093#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4094 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4095 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
4096
4097#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
4098#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4099 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4100 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
4101
4102/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
4103DECL_INLINE_THROW(uint32_t)
4104iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
4105 bool fUpdateSkipping)
4106{
4107 RT_NOREF(fEflOutput);
4108 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
4109 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4110
4111#ifdef VBOX_STRICT
4112 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
4113 uint32_t offFixup = off;
4114 off = iemNativeEmitJnzToFixed(pReNative, off, off);
4115 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
4116 iemNativeFixupFixedJump(pReNative, offFixup, off);
4117
4118 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
4119 offFixup = off;
4120 off = iemNativeEmitJzToFixed(pReNative, off, off);
4121 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
4122 iemNativeFixupFixedJump(pReNative, offFixup, off);
4123
4124 /** @todo validate that only bits in the fElfOutput mask changed. */
4125#endif
4126
4127#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4128 if (fUpdateSkipping)
4129 {
4130 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4131 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4132 else
4133 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4134 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4135 }
4136#else
4137 RT_NOREF_PV(fUpdateSkipping);
4138#endif
4139
4140 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4141 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4142 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4143 return off;
4144}
4145
4146
4147
4148/*********************************************************************************************************************************
4149* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4150*********************************************************************************************************************************/
4151
4152#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4153 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4154
4155#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4156 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4157
4158#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4159 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4160
4161
4162/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4163 * IEM_MC_FETCH_SREG_ZX_U64. */
4164DECL_INLINE_THROW(uint32_t)
4165iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4166{
4167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4168 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4169 Assert(iSReg < X86_SREG_COUNT);
4170
4171 /*
4172 * For now, we will not create a shadow copy of a selector. The rational
4173 * is that since we do not recompile the popping and loading of segment
4174 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4175 * pushing and moving to registers, there is only a small chance that the
4176 * shadow copy will be accessed again before the register is reloaded. One
4177 * scenario would be nested called in 16-bit code, but I doubt it's worth
4178 * the extra register pressure atm.
4179 *
4180 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4181 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4182 * store scencario covered at present (r160730).
4183 */
4184 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4185 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4186 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4187 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4188 return off;
4189}
4190
4191
4192
4193/*********************************************************************************************************************************
4194* Register references. *
4195*********************************************************************************************************************************/
4196
4197#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4198 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4199
4200#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4201 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4202
4203/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4204DECL_INLINE_THROW(uint32_t)
4205iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4206{
4207 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4208 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4209 Assert(iGRegEx < 20);
4210
4211 if (iGRegEx < 16)
4212 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4213 else
4214 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4215
4216 /* If we've delayed writing back the register value, flush it now. */
4217 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4218
4219 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4220 if (!fConst)
4221 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4222
4223 return off;
4224}
4225
4226#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4227 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4228
4229#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4230 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4231
4232#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4233 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4234
4235#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4236 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4237
4238#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4239 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4240
4241#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4242 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4243
4244#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4245 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4246
4247#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4248 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4249
4250#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4251 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4252
4253#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4254 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4255
4256/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4257DECL_INLINE_THROW(uint32_t)
4258iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4259{
4260 Assert(iGReg < 16);
4261 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4262 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4263
4264 /* If we've delayed writing back the register value, flush it now. */
4265 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4266
4267 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4268 if (!fConst)
4269 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4270
4271 return off;
4272}
4273
4274
4275#undef IEM_MC_REF_EFLAGS /* should not be used. */
4276#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4277 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4278 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4279
4280/** Handles IEM_MC_REF_EFLAGS. */
4281DECL_INLINE_THROW(uint32_t)
4282iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4283{
4284 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4285 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4286
4287#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4288 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4289
4290 /* Updating the skipping according to the outputs is a little early, but
4291 we don't have any other hooks for references atm. */
4292 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4293 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4294 else if (fEflOutput & X86_EFL_STATUS_BITS)
4295 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4296 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4297#else
4298 RT_NOREF(fEflInput, fEflOutput);
4299#endif
4300
4301 /* If we've delayed writing back the register value, flush it now. */
4302 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4303
4304 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4305 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4306
4307 return off;
4308}
4309
4310
4311/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4312 * different code from threaded recompiler, maybe it would be helpful. For now
4313 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4314#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4315
4316
4317#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4318 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4319
4320#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4321 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4322
4323#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4324 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4325
4326#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4327/* Just being paranoid here. */
4328# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4329AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4330AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4331AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4332AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4333# endif
4334AssertCompileMemberOffset(X86XMMREG, au64, 0);
4335AssertCompileMemberOffset(X86XMMREG, au32, 0);
4336AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4337AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4338
4339# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4340 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4341# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4342 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4343# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4344 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4345# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4346 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4347#endif
4348
4349/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4350DECL_INLINE_THROW(uint32_t)
4351iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4352{
4353 Assert(iXReg < 16);
4354 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4355 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4356
4357 /* If we've delayed writing back the register value, flush it now. */
4358 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4359
4360#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4361 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4362 if (!fConst)
4363 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4364#else
4365 RT_NOREF(fConst);
4366#endif
4367
4368 return off;
4369}
4370
4371
4372#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
4373 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
4374
4375/** Handles IEM_MC_REF_MXCSR. */
4376DECL_INLINE_THROW(uint32_t)
4377iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
4378{
4379 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
4380 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4381
4382 /* If we've delayed writing back the register value, flush it now. */
4383 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
4384
4385 /* If there is a shadow copy of guest MXCSR, flush it now. */
4386 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
4387
4388 return off;
4389}
4390
4391
4392
4393/*********************************************************************************************************************************
4394* Effective Address Calculation *
4395*********************************************************************************************************************************/
4396#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4397 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4398
4399/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4400 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4401DECL_INLINE_THROW(uint32_t)
4402iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4403 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4404{
4405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4406
4407 /*
4408 * Handle the disp16 form with no registers first.
4409 *
4410 * Convert to an immediate value, as that'll delay the register allocation
4411 * and assignment till the memory access / call / whatever and we can use
4412 * a more appropriate register (or none at all).
4413 */
4414 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4415 {
4416 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4417 return off;
4418 }
4419
4420 /* Determin the displacment. */
4421 uint16_t u16EffAddr;
4422 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4423 {
4424 case 0: u16EffAddr = 0; break;
4425 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4426 case 2: u16EffAddr = u16Disp; break;
4427 default: AssertFailedStmt(u16EffAddr = 0);
4428 }
4429
4430 /* Determine the registers involved. */
4431 uint8_t idxGstRegBase;
4432 uint8_t idxGstRegIndex;
4433 switch (bRm & X86_MODRM_RM_MASK)
4434 {
4435 case 0:
4436 idxGstRegBase = X86_GREG_xBX;
4437 idxGstRegIndex = X86_GREG_xSI;
4438 break;
4439 case 1:
4440 idxGstRegBase = X86_GREG_xBX;
4441 idxGstRegIndex = X86_GREG_xDI;
4442 break;
4443 case 2:
4444 idxGstRegBase = X86_GREG_xBP;
4445 idxGstRegIndex = X86_GREG_xSI;
4446 break;
4447 case 3:
4448 idxGstRegBase = X86_GREG_xBP;
4449 idxGstRegIndex = X86_GREG_xDI;
4450 break;
4451 case 4:
4452 idxGstRegBase = X86_GREG_xSI;
4453 idxGstRegIndex = UINT8_MAX;
4454 break;
4455 case 5:
4456 idxGstRegBase = X86_GREG_xDI;
4457 idxGstRegIndex = UINT8_MAX;
4458 break;
4459 case 6:
4460 idxGstRegBase = X86_GREG_xBP;
4461 idxGstRegIndex = UINT8_MAX;
4462 break;
4463#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4464 default:
4465#endif
4466 case 7:
4467 idxGstRegBase = X86_GREG_xBX;
4468 idxGstRegIndex = UINT8_MAX;
4469 break;
4470 }
4471
4472 /*
4473 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4474 */
4475 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4476 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4477 kIemNativeGstRegUse_ReadOnly);
4478 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4479 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4480 kIemNativeGstRegUse_ReadOnly)
4481 : UINT8_MAX;
4482#ifdef RT_ARCH_AMD64
4483 if (idxRegIndex == UINT8_MAX)
4484 {
4485 if (u16EffAddr == 0)
4486 {
4487 /* movxz ret, base */
4488 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4489 }
4490 else
4491 {
4492 /* lea ret32, [base64 + disp32] */
4493 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4494 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4495 if (idxRegRet >= 8 || idxRegBase >= 8)
4496 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4497 pbCodeBuf[off++] = 0x8d;
4498 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4499 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4500 else
4501 {
4502 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4503 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4504 }
4505 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4506 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4507 pbCodeBuf[off++] = 0;
4508 pbCodeBuf[off++] = 0;
4509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4510
4511 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4512 }
4513 }
4514 else
4515 {
4516 /* lea ret32, [index64 + base64 (+ disp32)] */
4517 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4518 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4519 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4520 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4521 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4522 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4523 pbCodeBuf[off++] = 0x8d;
4524 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4525 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4526 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4527 if (bMod == X86_MOD_MEM4)
4528 {
4529 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4530 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4531 pbCodeBuf[off++] = 0;
4532 pbCodeBuf[off++] = 0;
4533 }
4534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4535 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4536 }
4537
4538#elif defined(RT_ARCH_ARM64)
4539 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4540 if (u16EffAddr == 0)
4541 {
4542 if (idxRegIndex == UINT8_MAX)
4543 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4544 else
4545 {
4546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4547 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4548 }
4549 }
4550 else
4551 {
4552 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4553 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4554 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4555 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4556 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4557 else
4558 {
4559 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4560 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4561 }
4562 if (idxRegIndex != UINT8_MAX)
4563 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4564 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4565 }
4566
4567#else
4568# error "port me"
4569#endif
4570
4571 if (idxRegIndex != UINT8_MAX)
4572 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4573 iemNativeRegFreeTmp(pReNative, idxRegBase);
4574 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4575 return off;
4576}
4577
4578
4579#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4580 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4581
4582/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4583 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4584DECL_INLINE_THROW(uint32_t)
4585iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4586 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4587{
4588 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4589
4590 /*
4591 * Handle the disp32 form with no registers first.
4592 *
4593 * Convert to an immediate value, as that'll delay the register allocation
4594 * and assignment till the memory access / call / whatever and we can use
4595 * a more appropriate register (or none at all).
4596 */
4597 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4598 {
4599 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4600 return off;
4601 }
4602
4603 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4604 uint32_t u32EffAddr = 0;
4605 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4606 {
4607 case 0: break;
4608 case 1: u32EffAddr = (int8_t)u32Disp; break;
4609 case 2: u32EffAddr = u32Disp; break;
4610 default: AssertFailed();
4611 }
4612
4613 /* Get the register (or SIB) value. */
4614 uint8_t idxGstRegBase = UINT8_MAX;
4615 uint8_t idxGstRegIndex = UINT8_MAX;
4616 uint8_t cShiftIndex = 0;
4617 switch (bRm & X86_MODRM_RM_MASK)
4618 {
4619 case 0: idxGstRegBase = X86_GREG_xAX; break;
4620 case 1: idxGstRegBase = X86_GREG_xCX; break;
4621 case 2: idxGstRegBase = X86_GREG_xDX; break;
4622 case 3: idxGstRegBase = X86_GREG_xBX; break;
4623 case 4: /* SIB */
4624 {
4625 /* index /w scaling . */
4626 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4627 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4628 {
4629 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4630 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4631 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4632 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4633 case 4: cShiftIndex = 0; /*no index*/ break;
4634 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4635 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4636 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4637 }
4638
4639 /* base */
4640 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4641 {
4642 case 0: idxGstRegBase = X86_GREG_xAX; break;
4643 case 1: idxGstRegBase = X86_GREG_xCX; break;
4644 case 2: idxGstRegBase = X86_GREG_xDX; break;
4645 case 3: idxGstRegBase = X86_GREG_xBX; break;
4646 case 4:
4647 idxGstRegBase = X86_GREG_xSP;
4648 u32EffAddr += uSibAndRspOffset >> 8;
4649 break;
4650 case 5:
4651 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4652 idxGstRegBase = X86_GREG_xBP;
4653 else
4654 {
4655 Assert(u32EffAddr == 0);
4656 u32EffAddr = u32Disp;
4657 }
4658 break;
4659 case 6: idxGstRegBase = X86_GREG_xSI; break;
4660 case 7: idxGstRegBase = X86_GREG_xDI; break;
4661 }
4662 break;
4663 }
4664 case 5: idxGstRegBase = X86_GREG_xBP; break;
4665 case 6: idxGstRegBase = X86_GREG_xSI; break;
4666 case 7: idxGstRegBase = X86_GREG_xDI; break;
4667 }
4668
4669 /*
4670 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4671 * the start of the function.
4672 */
4673 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4674 {
4675 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4676 return off;
4677 }
4678
4679 /*
4680 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4681 */
4682 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4683 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4684 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4685 kIemNativeGstRegUse_ReadOnly);
4686 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4687 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4688 kIemNativeGstRegUse_ReadOnly);
4689
4690 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4691 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4692 {
4693 idxRegBase = idxRegIndex;
4694 idxRegIndex = UINT8_MAX;
4695 }
4696
4697#ifdef RT_ARCH_AMD64
4698 if (idxRegIndex == UINT8_MAX)
4699 {
4700 if (u32EffAddr == 0)
4701 {
4702 /* mov ret, base */
4703 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4704 }
4705 else
4706 {
4707 /* lea ret32, [base64 + disp32] */
4708 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4709 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4710 if (idxRegRet >= 8 || idxRegBase >= 8)
4711 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4712 pbCodeBuf[off++] = 0x8d;
4713 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4714 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4715 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4716 else
4717 {
4718 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4719 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4720 }
4721 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4722 if (bMod == X86_MOD_MEM4)
4723 {
4724 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4725 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4726 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4727 }
4728 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4729 }
4730 }
4731 else
4732 {
4733 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4734 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4735 if (idxRegBase == UINT8_MAX)
4736 {
4737 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4738 if (idxRegRet >= 8 || idxRegIndex >= 8)
4739 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4740 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4741 pbCodeBuf[off++] = 0x8d;
4742 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4743 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4744 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4745 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4746 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4747 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4748 }
4749 else
4750 {
4751 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4752 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4753 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4754 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4755 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4756 pbCodeBuf[off++] = 0x8d;
4757 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4758 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4759 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4760 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4761 if (bMod != X86_MOD_MEM0)
4762 {
4763 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4764 if (bMod == X86_MOD_MEM4)
4765 {
4766 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4767 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4768 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4769 }
4770 }
4771 }
4772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4773 }
4774
4775#elif defined(RT_ARCH_ARM64)
4776 if (u32EffAddr == 0)
4777 {
4778 if (idxRegIndex == UINT8_MAX)
4779 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4780 else if (idxRegBase == UINT8_MAX)
4781 {
4782 if (cShiftIndex == 0)
4783 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4784 else
4785 {
4786 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4787 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4788 }
4789 }
4790 else
4791 {
4792 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4793 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4794 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4795 }
4796 }
4797 else
4798 {
4799 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4800 {
4801 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4802 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4803 }
4804 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4805 {
4806 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4807 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4808 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4809 }
4810 else
4811 {
4812 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4813 if (idxRegBase != UINT8_MAX)
4814 {
4815 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4816 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4817 }
4818 }
4819 if (idxRegIndex != UINT8_MAX)
4820 {
4821 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4822 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4823 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4824 }
4825 }
4826
4827#else
4828# error "port me"
4829#endif
4830
4831 if (idxRegIndex != UINT8_MAX)
4832 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4833 if (idxRegBase != UINT8_MAX)
4834 iemNativeRegFreeTmp(pReNative, idxRegBase);
4835 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4836 return off;
4837}
4838
4839
4840#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4841 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4842 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4843
4844#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4845 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4846 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4847
4848#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4849 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4850 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4851
4852/**
4853 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4854 *
4855 * @returns New off.
4856 * @param pReNative .
4857 * @param off .
4858 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4859 * bit 4 to REX.X. The two bits are part of the
4860 * REG sub-field, which isn't needed in this
4861 * function.
4862 * @param uSibAndRspOffset Two parts:
4863 * - The first 8 bits make up the SIB byte.
4864 * - The next 8 bits are the fixed RSP/ESP offset
4865 * in case of a pop [xSP].
4866 * @param u32Disp The displacement byte/word/dword, if any.
4867 * @param cbInstr The size of the fully decoded instruction. Used
4868 * for RIP relative addressing.
4869 * @param idxVarRet The result variable number.
4870 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4871 * when calculating the address.
4872 *
4873 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4874 */
4875DECL_INLINE_THROW(uint32_t)
4876iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4877 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4878{
4879 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4880
4881 /*
4882 * Special case the rip + disp32 form first.
4883 */
4884 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4885 {
4886#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4887 /* Need to take the current PC offset into account for the displacement, no need to flush here
4888 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4889 u32Disp += pReNative->Core.offPc;
4890#endif
4891
4892 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4893 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4894 kIemNativeGstRegUse_ReadOnly);
4895#ifdef RT_ARCH_AMD64
4896 if (f64Bit)
4897 {
4898 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4899 if ((int32_t)offFinalDisp == offFinalDisp)
4900 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4901 else
4902 {
4903 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4904 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4905 }
4906 }
4907 else
4908 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4909
4910#elif defined(RT_ARCH_ARM64)
4911 if (f64Bit)
4912 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4913 (int64_t)(int32_t)u32Disp + cbInstr);
4914 else
4915 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4916 (int32_t)u32Disp + cbInstr);
4917
4918#else
4919# error "Port me!"
4920#endif
4921 iemNativeRegFreeTmp(pReNative, idxRegPc);
4922 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4923 return off;
4924 }
4925
4926 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4927 int64_t i64EffAddr = 0;
4928 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4929 {
4930 case 0: break;
4931 case 1: i64EffAddr = (int8_t)u32Disp; break;
4932 case 2: i64EffAddr = (int32_t)u32Disp; break;
4933 default: AssertFailed();
4934 }
4935
4936 /* Get the register (or SIB) value. */
4937 uint8_t idxGstRegBase = UINT8_MAX;
4938 uint8_t idxGstRegIndex = UINT8_MAX;
4939 uint8_t cShiftIndex = 0;
4940 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4941 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4942 else /* SIB: */
4943 {
4944 /* index /w scaling . */
4945 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4946 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4947 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4948 if (idxGstRegIndex == 4)
4949 {
4950 /* no index */
4951 cShiftIndex = 0;
4952 idxGstRegIndex = UINT8_MAX;
4953 }
4954
4955 /* base */
4956 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4957 if (idxGstRegBase == 4)
4958 {
4959 /* pop [rsp] hack */
4960 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4961 }
4962 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4963 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4964 {
4965 /* mod=0 and base=5 -> disp32, no base reg. */
4966 Assert(i64EffAddr == 0);
4967 i64EffAddr = (int32_t)u32Disp;
4968 idxGstRegBase = UINT8_MAX;
4969 }
4970 }
4971
4972 /*
4973 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4974 * the start of the function.
4975 */
4976 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4977 {
4978 if (f64Bit)
4979 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4980 else
4981 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4982 return off;
4983 }
4984
4985 /*
4986 * Now emit code that calculates:
4987 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4988 * or if !f64Bit:
4989 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4990 */
4991 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4992 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4993 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4994 kIemNativeGstRegUse_ReadOnly);
4995 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4996 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4997 kIemNativeGstRegUse_ReadOnly);
4998
4999 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5000 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5001 {
5002 idxRegBase = idxRegIndex;
5003 idxRegIndex = UINT8_MAX;
5004 }
5005
5006#ifdef RT_ARCH_AMD64
5007 uint8_t bFinalAdj;
5008 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
5009 bFinalAdj = 0; /* likely */
5010 else
5011 {
5012 /* pop [rsp] with a problematic disp32 value. Split out the
5013 RSP offset and add it separately afterwards (bFinalAdj). */
5014 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
5015 Assert(idxGstRegBase == X86_GREG_xSP);
5016 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
5017 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
5018 Assert(bFinalAdj != 0);
5019 i64EffAddr -= bFinalAdj;
5020 Assert((int32_t)i64EffAddr == i64EffAddr);
5021 }
5022 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
5023//pReNative->pInstrBuf[off++] = 0xcc;
5024
5025 if (idxRegIndex == UINT8_MAX)
5026 {
5027 if (u32EffAddr == 0)
5028 {
5029 /* mov ret, base */
5030 if (f64Bit)
5031 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
5032 else
5033 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5034 }
5035 else
5036 {
5037 /* lea ret, [base + disp32] */
5038 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5039 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5040 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
5041 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5042 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5043 | (f64Bit ? X86_OP_REX_W : 0);
5044 pbCodeBuf[off++] = 0x8d;
5045 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5046 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5047 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5048 else
5049 {
5050 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5051 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5052 }
5053 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5054 if (bMod == X86_MOD_MEM4)
5055 {
5056 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5057 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5058 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5059 }
5060 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5061 }
5062 }
5063 else
5064 {
5065 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5066 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5067 if (idxRegBase == UINT8_MAX)
5068 {
5069 /* lea ret, [(index64 << cShiftIndex) + disp32] */
5070 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
5071 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5072 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5073 | (f64Bit ? X86_OP_REX_W : 0);
5074 pbCodeBuf[off++] = 0x8d;
5075 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5076 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5077 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5078 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5079 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5080 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5081 }
5082 else
5083 {
5084 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5085 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5086 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5087 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5088 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5089 | (f64Bit ? X86_OP_REX_W : 0);
5090 pbCodeBuf[off++] = 0x8d;
5091 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5092 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5093 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5094 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5095 if (bMod != X86_MOD_MEM0)
5096 {
5097 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5098 if (bMod == X86_MOD_MEM4)
5099 {
5100 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5101 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5102 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5103 }
5104 }
5105 }
5106 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5107 }
5108
5109 if (!bFinalAdj)
5110 { /* likely */ }
5111 else
5112 {
5113 Assert(f64Bit);
5114 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
5115 }
5116
5117#elif defined(RT_ARCH_ARM64)
5118 if (i64EffAddr == 0)
5119 {
5120 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5121 if (idxRegIndex == UINT8_MAX)
5122 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
5123 else if (idxRegBase != UINT8_MAX)
5124 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5125 f64Bit, false /*fSetFlags*/, cShiftIndex);
5126 else
5127 {
5128 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
5129 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
5130 }
5131 }
5132 else
5133 {
5134 if (f64Bit)
5135 { /* likely */ }
5136 else
5137 i64EffAddr = (int32_t)i64EffAddr;
5138
5139 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
5140 {
5141 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5142 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5143 }
5144 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5145 {
5146 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5147 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5148 }
5149 else
5150 {
5151 if (f64Bit)
5152 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5153 else
5154 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5155 if (idxRegBase != UINT8_MAX)
5156 {
5157 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5158 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5159 }
5160 }
5161 if (idxRegIndex != UINT8_MAX)
5162 {
5163 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5164 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5165 f64Bit, false /*fSetFlags*/, cShiftIndex);
5166 }
5167 }
5168
5169#else
5170# error "port me"
5171#endif
5172
5173 if (idxRegIndex != UINT8_MAX)
5174 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5175 if (idxRegBase != UINT8_MAX)
5176 iemNativeRegFreeTmp(pReNative, idxRegBase);
5177 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5178 return off;
5179}
5180
5181
5182/*********************************************************************************************************************************
5183* Memory fetches and stores common *
5184*********************************************************************************************************************************/
5185
5186typedef enum IEMNATIVEMITMEMOP
5187{
5188 kIemNativeEmitMemOp_Store = 0,
5189 kIemNativeEmitMemOp_Fetch,
5190 kIemNativeEmitMemOp_Fetch_Zx_U16,
5191 kIemNativeEmitMemOp_Fetch_Zx_U32,
5192 kIemNativeEmitMemOp_Fetch_Zx_U64,
5193 kIemNativeEmitMemOp_Fetch_Sx_U16,
5194 kIemNativeEmitMemOp_Fetch_Sx_U32,
5195 kIemNativeEmitMemOp_Fetch_Sx_U64
5196} IEMNATIVEMITMEMOP;
5197
5198/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5199 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5200 * (with iSegReg = UINT8_MAX). */
5201DECL_INLINE_THROW(uint32_t)
5202iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5203 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5204 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5205{
5206 /*
5207 * Assert sanity.
5208 */
5209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5210 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5211 Assert( enmOp != kIemNativeEmitMemOp_Store
5212 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5213 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5215 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5216 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5217 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5218 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5219 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5220#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5221 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5222 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5223#else
5224 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5225#endif
5226 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5227#ifdef VBOX_STRICT
5228 if (iSegReg == UINT8_MAX)
5229 {
5230 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5231 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5232 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5233 switch (cbMem)
5234 {
5235 case 1:
5236 Assert( pfnFunction
5237 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5238 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5239 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5240 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5241 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5242 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5243 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5244 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5245 : UINT64_C(0xc000b000a0009000) ));
5246 break;
5247 case 2:
5248 Assert( pfnFunction
5249 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5250 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5251 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5252 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5253 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5254 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5255 : UINT64_C(0xc000b000a0009000) ));
5256 break;
5257 case 4:
5258 Assert( pfnFunction
5259 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5260 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5261 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5262 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5263 : UINT64_C(0xc000b000a0009000) ));
5264 break;
5265 case 8:
5266 Assert( pfnFunction
5267 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5268 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5269 : UINT64_C(0xc000b000a0009000) ));
5270 break;
5271#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5272 case sizeof(RTUINT128U):
5273 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5274 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5275 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5276 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5277 || ( enmOp == kIemNativeEmitMemOp_Store
5278 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5279 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5280 break;
5281 case sizeof(RTUINT256U):
5282 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5283 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5284 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5285 || ( enmOp == kIemNativeEmitMemOp_Store
5286 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5287 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5288 break;
5289#endif
5290 }
5291 }
5292 else
5293 {
5294 Assert(iSegReg < 6);
5295 switch (cbMem)
5296 {
5297 case 1:
5298 Assert( pfnFunction
5299 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5300 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5301 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5302 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5303 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5304 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5305 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5306 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5307 : UINT64_C(0xc000b000a0009000) ));
5308 break;
5309 case 2:
5310 Assert( pfnFunction
5311 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5312 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5313 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5314 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5315 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5316 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5317 : UINT64_C(0xc000b000a0009000) ));
5318 break;
5319 case 4:
5320 Assert( pfnFunction
5321 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5322 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5323 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5324 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5325 : UINT64_C(0xc000b000a0009000) ));
5326 break;
5327 case 8:
5328 Assert( pfnFunction
5329 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5330 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5331 : UINT64_C(0xc000b000a0009000) ));
5332 break;
5333#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5334 case sizeof(RTUINT128U):
5335 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5336 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5337 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5338 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5339 || ( enmOp == kIemNativeEmitMemOp_Store
5340 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5341 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5342 break;
5343 case sizeof(RTUINT256U):
5344 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5345 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5346 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5347 || ( enmOp == kIemNativeEmitMemOp_Store
5348 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5349 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5350 break;
5351#endif
5352 }
5353 }
5354#endif
5355
5356#ifdef VBOX_STRICT
5357 /*
5358 * Check that the fExec flags we've got make sense.
5359 */
5360 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5361#endif
5362
5363 /*
5364 * To keep things simple we have to commit any pending writes first as we
5365 * may end up making calls.
5366 */
5367 /** @todo we could postpone this till we make the call and reload the
5368 * registers after returning from the call. Not sure if that's sensible or
5369 * not, though. */
5370#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5371 off = iemNativeRegFlushPendingWrites(pReNative, off);
5372#else
5373 /* The program counter is treated differently for now. */
5374 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5375#endif
5376
5377#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5378 /*
5379 * Move/spill/flush stuff out of call-volatile registers.
5380 * This is the easy way out. We could contain this to the tlb-miss branch
5381 * by saving and restoring active stuff here.
5382 */
5383 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5384#endif
5385
5386 /*
5387 * Define labels and allocate the result register (trying for the return
5388 * register if we can).
5389 */
5390 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5391#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5392 uint8_t idxRegValueFetch = UINT8_MAX;
5393
5394 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5395 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5396 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5397 else
5398 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5399 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5400 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5401 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5402#else
5403 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5404 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5405 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5406 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5407#endif
5408 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5409
5410#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5411 uint8_t idxRegValueStore = UINT8_MAX;
5412
5413 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5414 idxRegValueStore = !TlbState.fSkip
5415 && enmOp == kIemNativeEmitMemOp_Store
5416 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5417 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5418 : UINT8_MAX;
5419 else
5420 idxRegValueStore = !TlbState.fSkip
5421 && enmOp == kIemNativeEmitMemOp_Store
5422 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5423 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5424 : UINT8_MAX;
5425
5426#else
5427 uint8_t const idxRegValueStore = !TlbState.fSkip
5428 && enmOp == kIemNativeEmitMemOp_Store
5429 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5430 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5431 : UINT8_MAX;
5432#endif
5433 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5434 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5435 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5436 : UINT32_MAX;
5437
5438 /*
5439 * Jump to the TLB lookup code.
5440 */
5441 if (!TlbState.fSkip)
5442 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5443
5444 /*
5445 * TlbMiss:
5446 *
5447 * Call helper to do the fetching.
5448 * We flush all guest register shadow copies here.
5449 */
5450 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5451
5452#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5453 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5454#else
5455 RT_NOREF(idxInstr);
5456#endif
5457
5458#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5459 if (pReNative->Core.offPc)
5460 {
5461 /*
5462 * Update the program counter but restore it at the end of the TlbMiss branch.
5463 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5464 * which are hopefully much more frequent, reducing the amount of memory accesses.
5465 */
5466 /* Allocate a temporary PC register. */
5467 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5468
5469 /* Perform the addition and store the result. */
5470 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5471 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5472
5473 /* Free and flush the PC register. */
5474 iemNativeRegFreeTmp(pReNative, idxPcReg);
5475 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5476 }
5477#endif
5478
5479#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5480 /* Save variables in volatile registers. */
5481 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5482 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5483 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5484 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5485#endif
5486
5487 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5488 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5489#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5490 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5491 {
5492 /*
5493 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5494 *
5495 * @note There was a register variable assigned to the variable for the TlbLookup case above
5496 * which must not be freed or the value loaded into the register will not be synced into the register
5497 * further down the road because the variable doesn't know it had a variable assigned.
5498 *
5499 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5500 * as it will be overwritten anyway.
5501 */
5502 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5503 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5504 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5505 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5506 }
5507 else
5508#endif
5509 if (enmOp == kIemNativeEmitMemOp_Store)
5510 {
5511 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5512 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5513#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5514 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5515#else
5516 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5517 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5518#endif
5519 }
5520
5521 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5522 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5523#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5524 fVolGregMask);
5525#else
5526 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5527#endif
5528
5529 if (iSegReg != UINT8_MAX)
5530 {
5531 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5532 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5533 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5534 }
5535
5536 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5537 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5538
5539 /* Done setting up parameters, make the call. */
5540 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5541
5542 /*
5543 * Put the result in the right register if this is a fetch.
5544 */
5545 if (enmOp != kIemNativeEmitMemOp_Store)
5546 {
5547#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5548 if ( cbMem == sizeof(RTUINT128U)
5549 || cbMem == sizeof(RTUINT256U))
5550 {
5551 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5552
5553 /* Sync the value on the stack with the host register assigned to the variable. */
5554 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5555 }
5556 else
5557#endif
5558 {
5559 Assert(idxRegValueFetch == pVarValue->idxReg);
5560 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5561 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5562 }
5563 }
5564
5565#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5566 /* Restore variables and guest shadow registers to volatile registers. */
5567 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5568 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5569#endif
5570
5571#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5572 if (pReNative->Core.offPc)
5573 {
5574 /*
5575 * Time to restore the program counter to its original value.
5576 */
5577 /* Allocate a temporary PC register. */
5578 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5579
5580 /* Restore the original value. */
5581 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5582 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5583
5584 /* Free and flush the PC register. */
5585 iemNativeRegFreeTmp(pReNative, idxPcReg);
5586 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5587 }
5588#endif
5589
5590#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5591 if (!TlbState.fSkip)
5592 {
5593 /* end of TlbMiss - Jump to the done label. */
5594 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5595 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5596
5597 /*
5598 * TlbLookup:
5599 */
5600 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5601 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5602 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5603
5604 /*
5605 * Emit code to do the actual storing / fetching.
5606 */
5607 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5608# ifdef VBOX_WITH_STATISTICS
5609 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5610 enmOp == kIemNativeEmitMemOp_Store
5611 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5612 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5613# endif
5614 switch (enmOp)
5615 {
5616 case kIemNativeEmitMemOp_Store:
5617 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5618 {
5619 switch (cbMem)
5620 {
5621 case 1:
5622 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5623 break;
5624 case 2:
5625 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5626 break;
5627 case 4:
5628 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5629 break;
5630 case 8:
5631 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5632 break;
5633#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5634 case sizeof(RTUINT128U):
5635 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5636 break;
5637 case sizeof(RTUINT256U):
5638 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5639 break;
5640#endif
5641 default:
5642 AssertFailed();
5643 }
5644 }
5645 else
5646 {
5647 switch (cbMem)
5648 {
5649 case 1:
5650 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5651 idxRegMemResult, TlbState.idxReg1);
5652 break;
5653 case 2:
5654 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5655 idxRegMemResult, TlbState.idxReg1);
5656 break;
5657 case 4:
5658 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5659 idxRegMemResult, TlbState.idxReg1);
5660 break;
5661 case 8:
5662 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5663 idxRegMemResult, TlbState.idxReg1);
5664 break;
5665 default:
5666 AssertFailed();
5667 }
5668 }
5669 break;
5670
5671 case kIemNativeEmitMemOp_Fetch:
5672 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5673 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5674 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5675 switch (cbMem)
5676 {
5677 case 1:
5678 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5679 break;
5680 case 2:
5681 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5682 break;
5683 case 4:
5684 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5685 break;
5686 case 8:
5687 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5688 break;
5689#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5690 case sizeof(RTUINT128U):
5691 /*
5692 * No need to sync back the register with the stack, this is done by the generic variable handling
5693 * code if there is a register assigned to a variable and the stack must be accessed.
5694 */
5695 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5696 break;
5697 case sizeof(RTUINT256U):
5698 /*
5699 * No need to sync back the register with the stack, this is done by the generic variable handling
5700 * code if there is a register assigned to a variable and the stack must be accessed.
5701 */
5702 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5703 break;
5704#endif
5705 default:
5706 AssertFailed();
5707 }
5708 break;
5709
5710 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5711 Assert(cbMem == 1);
5712 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5713 break;
5714
5715 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5716 Assert(cbMem == 1 || cbMem == 2);
5717 if (cbMem == 1)
5718 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5719 else
5720 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5721 break;
5722
5723 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5724 switch (cbMem)
5725 {
5726 case 1:
5727 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5728 break;
5729 case 2:
5730 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5731 break;
5732 case 4:
5733 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5734 break;
5735 default:
5736 AssertFailed();
5737 }
5738 break;
5739
5740 default:
5741 AssertFailed();
5742 }
5743
5744 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5745
5746 /*
5747 * TlbDone:
5748 */
5749 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5750
5751 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5752
5753# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5754 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5755 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5756# endif
5757 }
5758#else
5759 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5760#endif
5761
5762 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5763 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5764 return off;
5765}
5766
5767
5768
5769/*********************************************************************************************************************************
5770* Memory fetches (IEM_MEM_FETCH_XXX). *
5771*********************************************************************************************************************************/
5772
5773/* 8-bit segmented: */
5774#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5775 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5776 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5777 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5778
5779#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5780 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5781 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5782 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5783
5784#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5785 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5786 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5787 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5788
5789#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5790 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5791 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5792 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5793
5794#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5795 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5796 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5797 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5798
5799#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5800 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5801 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5802 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5803
5804#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5805 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5806 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5807 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5808
5809/* 16-bit segmented: */
5810#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5811 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5812 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5813 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5814
5815#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5816 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5817 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5818 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5819
5820#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5821 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5822 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5823 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5824
5825#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5826 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5827 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5828 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5829
5830#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5831 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5832 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5833 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5834
5835#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5836 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5837 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5838 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5839
5840
5841/* 32-bit segmented: */
5842#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5843 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5844 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5845 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5846
5847#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5848 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5849 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5850 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5851
5852#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5853 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5854 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5855 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5856
5857#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5858 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5859 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5860 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5861
5862#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
5863 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
5864 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5865 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5866
5867#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
5868 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
5869 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5870 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5871
5872#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
5873 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
5874 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5875 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5876
5877AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5878#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5879 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5880 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5881 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5882
5883
5884/* 64-bit segmented: */
5885#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5886 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5887 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5888 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5889
5890AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5891#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5893 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5894 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5895
5896
5897/* 8-bit flat: */
5898#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5900 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5901 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5902
5903#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5905 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5906 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5907
5908#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5910 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5911 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5912
5913#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5915 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5916 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5917
5918#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5919 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5920 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5921 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5922
5923#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5925 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5926 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5927
5928#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5930 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5931 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5932
5933
5934/* 16-bit flat: */
5935#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5937 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5938 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5939
5940#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5942 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5943 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5944
5945#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5947 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5948 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5949
5950#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5951 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5952 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5953 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5954
5955#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5956 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5957 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5958 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5959
5960#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5961 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5962 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5963 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5964
5965/* 32-bit flat: */
5966#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5968 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5969 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5970
5971#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5973 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5974 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5975
5976#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5977 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5978 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5979 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5980
5981#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5982 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5983 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5984 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5985
5986#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
5987 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
5988 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5989 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5990
5991#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
5992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
5993 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5994 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5995
5996#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
5997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
5998 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5999 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6000
6001#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
6002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
6003 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6004 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6005
6006
6007/* 64-bit flat: */
6008#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
6009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6010 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6011 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6012
6013#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
6014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
6015 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6016 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6017
6018#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6019/* 128-bit segmented: */
6020#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
6021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6022 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6023 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
6024
6025#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
6026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6027 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6028 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6029
6030AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
6031#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
6032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
6033 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6034 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6035
6036#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
6037 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6038 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6039 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
6040
6041/* 128-bit flat: */
6042#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
6043 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6044 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6045 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
6046
6047#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
6048 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6049 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6050 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6051
6052#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
6053 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
6054 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6055 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6056
6057#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
6058 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6059 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6060 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
6061
6062/* 256-bit segmented: */
6063#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
6064 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6065 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6066 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6067
6068#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
6069 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6070 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6071 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6072
6073#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
6074 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6075 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6076 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6077
6078
6079/* 256-bit flat: */
6080#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
6081 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6082 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6083 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6084
6085#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
6086 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6087 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6088 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6089
6090#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
6091 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6092 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6093 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6094#endif
6095
6096
6097/*********************************************************************************************************************************
6098* Memory stores (IEM_MEM_STORE_XXX). *
6099*********************************************************************************************************************************/
6100
6101#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
6102 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
6103 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6104 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6105
6106#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
6107 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
6108 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6109 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6110
6111#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
6112 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
6113 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6114 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6115
6116#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
6117 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
6118 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6119 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6120
6121
6122#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
6123 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
6124 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6125 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6126
6127#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
6128 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
6129 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6130 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6131
6132#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
6133 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
6134 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6135 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6136
6137#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
6138 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
6139 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6140 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6141
6142
6143#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
6144 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6145 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6146
6147#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
6148 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6149 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6150
6151#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
6152 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6153 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6154
6155#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
6156 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6157 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6158
6159
6160#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
6161 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6162 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6163
6164#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
6165 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6166 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6167
6168#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
6169 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6170 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6171
6172#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6173 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6174 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6175
6176/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6177 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6178DECL_INLINE_THROW(uint32_t)
6179iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6180 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6181{
6182 /*
6183 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6184 * to do the grunt work.
6185 */
6186 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6187 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6188 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6189 pfnFunction, idxInstr);
6190 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6191 return off;
6192}
6193
6194
6195#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6196# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6197 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6198 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6199 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6200
6201# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6202 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6203 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6204 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6205
6206# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6207 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6208 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6209 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6210
6211# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6212 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6213 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6214 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6215
6216
6217# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6218 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6219 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6220 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6221
6222# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6223 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6224 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6225 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6226
6227# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6228 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6229 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6230 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6231
6232# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6233 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6234 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6235 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6236#endif
6237
6238
6239
6240/*********************************************************************************************************************************
6241* Stack Accesses. *
6242*********************************************************************************************************************************/
6243/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6244#define IEM_MC_PUSH_U16(a_u16Value) \
6245 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6246 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6247#define IEM_MC_PUSH_U32(a_u32Value) \
6248 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6249 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6250#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6251 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6252 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6253#define IEM_MC_PUSH_U64(a_u64Value) \
6254 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6255 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6256
6257#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6258 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6259 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6260#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6261 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6262 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6263#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6264 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6265 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6266
6267#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6268 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6269 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6270#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6271 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6272 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6273
6274
6275DECL_FORCE_INLINE_THROW(uint32_t)
6276iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6277{
6278 /* Use16BitSp: */
6279#ifdef RT_ARCH_AMD64
6280 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6281 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6282#else
6283 /* sub regeff, regrsp, #cbMem */
6284 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6285 /* and regeff, regeff, #0xffff */
6286 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6287 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6288 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6289 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6290#endif
6291 return off;
6292}
6293
6294
6295DECL_FORCE_INLINE(uint32_t)
6296iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6297{
6298 /* Use32BitSp: */
6299 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6300 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6301 return off;
6302}
6303
6304
6305/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6306DECL_INLINE_THROW(uint32_t)
6307iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6308 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6309{
6310 /*
6311 * Assert sanity.
6312 */
6313 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6314 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6315#ifdef VBOX_STRICT
6316 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6317 {
6318 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6319 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6320 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6321 Assert( pfnFunction
6322 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6323 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6324 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6325 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6326 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6327 : UINT64_C(0xc000b000a0009000) ));
6328 }
6329 else
6330 Assert( pfnFunction
6331 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6332 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6333 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6334 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6335 : UINT64_C(0xc000b000a0009000) ));
6336#endif
6337
6338#ifdef VBOX_STRICT
6339 /*
6340 * Check that the fExec flags we've got make sense.
6341 */
6342 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6343#endif
6344
6345 /*
6346 * To keep things simple we have to commit any pending writes first as we
6347 * may end up making calls.
6348 */
6349 /** @todo we could postpone this till we make the call and reload the
6350 * registers after returning from the call. Not sure if that's sensible or
6351 * not, though. */
6352 off = iemNativeRegFlushPendingWrites(pReNative, off);
6353
6354 /*
6355 * First we calculate the new RSP and the effective stack pointer value.
6356 * For 64-bit mode and flat 32-bit these two are the same.
6357 * (Code structure is very similar to that of PUSH)
6358 */
6359 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6360 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6361 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6362 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6363 ? cbMem : sizeof(uint16_t);
6364 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6365 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6366 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6367 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6368 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6369 if (cBitsFlat != 0)
6370 {
6371 Assert(idxRegEffSp == idxRegRsp);
6372 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6373 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6374 if (cBitsFlat == 64)
6375 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6376 else
6377 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6378 }
6379 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6380 {
6381 Assert(idxRegEffSp != idxRegRsp);
6382 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6383 kIemNativeGstRegUse_ReadOnly);
6384#ifdef RT_ARCH_AMD64
6385 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6386#else
6387 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6388#endif
6389 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6390 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6391 offFixupJumpToUseOtherBitSp = off;
6392 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6393 {
6394 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6395 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6396 }
6397 else
6398 {
6399 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6400 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6401 }
6402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6403 }
6404 /* SpUpdateEnd: */
6405 uint32_t const offLabelSpUpdateEnd = off;
6406
6407 /*
6408 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6409 * we're skipping lookup).
6410 */
6411 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6412 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6413 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6414 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6415 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6416 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6417 : UINT32_MAX;
6418 uint8_t const idxRegValue = !TlbState.fSkip
6419 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6420 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6421 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6422 : UINT8_MAX;
6423 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6424
6425
6426 if (!TlbState.fSkip)
6427 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6428 else
6429 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6430
6431 /*
6432 * Use16BitSp:
6433 */
6434 if (cBitsFlat == 0)
6435 {
6436#ifdef RT_ARCH_AMD64
6437 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6438#else
6439 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6440#endif
6441 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6442 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6443 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6444 else
6445 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6446 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6447 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6448 }
6449
6450 /*
6451 * TlbMiss:
6452 *
6453 * Call helper to do the pushing.
6454 */
6455 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6456
6457#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6458 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6459#else
6460 RT_NOREF(idxInstr);
6461#endif
6462
6463 /* Save variables in volatile registers. */
6464 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6465 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6466 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6467 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6468 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6469
6470 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6471 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6472 {
6473 /* Swap them using ARG0 as temp register: */
6474 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6475 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6477 }
6478 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6479 {
6480 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6481 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6482 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6483
6484 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6485 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6487 }
6488 else
6489 {
6490 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6491 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6492
6493 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6494 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6495 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6496 }
6497
6498 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6499 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6500
6501 /* Done setting up parameters, make the call. */
6502 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6503
6504 /* Restore variables and guest shadow registers to volatile registers. */
6505 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6506 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6507
6508#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6509 if (!TlbState.fSkip)
6510 {
6511 /* end of TlbMiss - Jump to the done label. */
6512 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6513 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6514
6515 /*
6516 * TlbLookup:
6517 */
6518 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6519 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6520
6521 /*
6522 * Emit code to do the actual storing / fetching.
6523 */
6524 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6525# ifdef VBOX_WITH_STATISTICS
6526 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6527 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6528# endif
6529 if (idxRegValue != UINT8_MAX)
6530 {
6531 switch (cbMemAccess)
6532 {
6533 case 2:
6534 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6535 break;
6536 case 4:
6537 if (!fIsIntelSeg)
6538 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6539 else
6540 {
6541 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6542 PUSH FS in real mode, so we have to try emulate that here.
6543 We borrow the now unused idxReg1 from the TLB lookup code here. */
6544 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6545 kIemNativeGstReg_EFlags);
6546 if (idxRegEfl != UINT8_MAX)
6547 {
6548#ifdef ARCH_AMD64
6549 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6550 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6551 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6552#else
6553 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6554 off, TlbState.idxReg1, idxRegEfl,
6555 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6556#endif
6557 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6558 }
6559 else
6560 {
6561 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6562 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6563 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6564 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6565 }
6566 /* ASSUMES the upper half of idxRegValue is ZERO. */
6567 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6568 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6569 }
6570 break;
6571 case 8:
6572 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6573 break;
6574 default:
6575 AssertFailed();
6576 }
6577 }
6578 else
6579 {
6580 switch (cbMemAccess)
6581 {
6582 case 2:
6583 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6584 idxRegMemResult, TlbState.idxReg1);
6585 break;
6586 case 4:
6587 Assert(!fIsSegReg);
6588 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6589 idxRegMemResult, TlbState.idxReg1);
6590 break;
6591 case 8:
6592 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6593 break;
6594 default:
6595 AssertFailed();
6596 }
6597 }
6598
6599 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6600 TlbState.freeRegsAndReleaseVars(pReNative);
6601
6602 /*
6603 * TlbDone:
6604 *
6605 * Commit the new RSP value.
6606 */
6607 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6608 }
6609#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6610
6611#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6612 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6613#endif
6614 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6615 if (idxRegEffSp != idxRegRsp)
6616 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6617
6618 /* The value variable is implictly flushed. */
6619 if (idxRegValue != UINT8_MAX)
6620 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6621 iemNativeVarFreeLocal(pReNative, idxVarValue);
6622
6623 return off;
6624}
6625
6626
6627
6628/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6629#define IEM_MC_POP_GREG_U16(a_iGReg) \
6630 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6631 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6632#define IEM_MC_POP_GREG_U32(a_iGReg) \
6633 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6634 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6635#define IEM_MC_POP_GREG_U64(a_iGReg) \
6636 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6637 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6638
6639#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6640 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6641 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6642#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6643 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6644 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6645
6646#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6647 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6648 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6649#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6650 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6651 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6652
6653
6654DECL_FORCE_INLINE_THROW(uint32_t)
6655iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6656 uint8_t idxRegTmp)
6657{
6658 /* Use16BitSp: */
6659#ifdef RT_ARCH_AMD64
6660 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6661 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6662 RT_NOREF(idxRegTmp);
6663#else
6664 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6665 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6666 /* add tmp, regrsp, #cbMem */
6667 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6668 /* and tmp, tmp, #0xffff */
6669 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6670 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6671 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6672 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6673#endif
6674 return off;
6675}
6676
6677
6678DECL_FORCE_INLINE(uint32_t)
6679iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6680{
6681 /* Use32BitSp: */
6682 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6683 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6684 return off;
6685}
6686
6687
6688/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6689DECL_INLINE_THROW(uint32_t)
6690iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6691 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6692{
6693 /*
6694 * Assert sanity.
6695 */
6696 Assert(idxGReg < 16);
6697#ifdef VBOX_STRICT
6698 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6699 {
6700 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6701 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6702 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6703 Assert( pfnFunction
6704 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6705 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6706 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6707 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6708 : UINT64_C(0xc000b000a0009000) ));
6709 }
6710 else
6711 Assert( pfnFunction
6712 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6713 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6714 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6715 : UINT64_C(0xc000b000a0009000) ));
6716#endif
6717
6718#ifdef VBOX_STRICT
6719 /*
6720 * Check that the fExec flags we've got make sense.
6721 */
6722 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6723#endif
6724
6725 /*
6726 * To keep things simple we have to commit any pending writes first as we
6727 * may end up making calls.
6728 */
6729 off = iemNativeRegFlushPendingWrites(pReNative, off);
6730
6731 /*
6732 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6733 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6734 * directly as the effective stack pointer.
6735 * (Code structure is very similar to that of PUSH)
6736 */
6737 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6738 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6739 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6740 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6741 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6742 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6743 * will be the resulting register value. */
6744 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6745
6746 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6747 if (cBitsFlat != 0)
6748 {
6749 Assert(idxRegEffSp == idxRegRsp);
6750 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6751 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6752 }
6753 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6754 {
6755 Assert(idxRegEffSp != idxRegRsp);
6756 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6757 kIemNativeGstRegUse_ReadOnly);
6758#ifdef RT_ARCH_AMD64
6759 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6760#else
6761 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6762#endif
6763 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6764 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6765 offFixupJumpToUseOtherBitSp = off;
6766 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6767 {
6768/** @todo can skip idxRegRsp updating when popping ESP. */
6769 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6770 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6771 }
6772 else
6773 {
6774 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6775 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6776 }
6777 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6778 }
6779 /* SpUpdateEnd: */
6780 uint32_t const offLabelSpUpdateEnd = off;
6781
6782 /*
6783 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6784 * we're skipping lookup).
6785 */
6786 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6787 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6788 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6789 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6790 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6791 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6792 : UINT32_MAX;
6793
6794 if (!TlbState.fSkip)
6795 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6796 else
6797 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6798
6799 /*
6800 * Use16BitSp:
6801 */
6802 if (cBitsFlat == 0)
6803 {
6804#ifdef RT_ARCH_AMD64
6805 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6806#else
6807 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6808#endif
6809 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6810 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6811 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6812 else
6813 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6814 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6816 }
6817
6818 /*
6819 * TlbMiss:
6820 *
6821 * Call helper to do the pushing.
6822 */
6823 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6824
6825#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6826 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6827#else
6828 RT_NOREF(idxInstr);
6829#endif
6830
6831 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6832 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6833 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6834 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6835
6836
6837 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6838 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6839 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6840
6841 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6843
6844 /* Done setting up parameters, make the call. */
6845 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6846
6847 /* Move the return register content to idxRegMemResult. */
6848 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6849 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6850
6851 /* Restore variables and guest shadow registers to volatile registers. */
6852 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6853 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6854
6855#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6856 if (!TlbState.fSkip)
6857 {
6858 /* end of TlbMiss - Jump to the done label. */
6859 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6860 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6861
6862 /*
6863 * TlbLookup:
6864 */
6865 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6866 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6867
6868 /*
6869 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6870 */
6871 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6872# ifdef VBOX_WITH_STATISTICS
6873 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6874 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6875# endif
6876 switch (cbMem)
6877 {
6878 case 2:
6879 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6880 break;
6881 case 4:
6882 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6883 break;
6884 case 8:
6885 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6886 break;
6887 default:
6888 AssertFailed();
6889 }
6890
6891 TlbState.freeRegsAndReleaseVars(pReNative);
6892
6893 /*
6894 * TlbDone:
6895 *
6896 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6897 * commit the popped register value.
6898 */
6899 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6900 }
6901#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6902
6903 if (idxGReg != X86_GREG_xSP)
6904 {
6905 /* Set the register. */
6906 if (cbMem >= sizeof(uint32_t))
6907 {
6908#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6909 AssertMsg( pReNative->idxCurCall == 0
6910 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6911 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6912#endif
6913 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6914#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6915 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6916#endif
6917#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6918 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6919 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6920#endif
6921 }
6922 else
6923 {
6924 Assert(cbMem == sizeof(uint16_t));
6925 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6926 kIemNativeGstRegUse_ForUpdate);
6927 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6928#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6929 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6930#endif
6931 iemNativeRegFreeTmp(pReNative, idxRegDst);
6932 }
6933
6934 /* Complete RSP calculation for FLAT mode. */
6935 if (idxRegEffSp == idxRegRsp)
6936 {
6937 if (cBitsFlat == 64)
6938 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6939 else
6940 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6941 }
6942 }
6943 else
6944 {
6945 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6946 if (cbMem == sizeof(uint64_t))
6947 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6948 else if (cbMem == sizeof(uint32_t))
6949 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6950 else
6951 {
6952 if (idxRegEffSp == idxRegRsp)
6953 {
6954 if (cBitsFlat == 64)
6955 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6956 else
6957 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6958 }
6959 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6960 }
6961 }
6962
6963#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6964 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6965#endif
6966
6967 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6968 if (idxRegEffSp != idxRegRsp)
6969 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6970 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6971
6972 return off;
6973}
6974
6975
6976
6977/*********************************************************************************************************************************
6978* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6979*********************************************************************************************************************************/
6980
6981#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6982 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6983 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6984 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6985
6986#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6987 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6988 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6989 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6990
6991#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6992 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6993 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6994 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6995
6996#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6997 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6998 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6999 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
7000
7001
7002#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7003 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7004 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7005 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
7006
7007#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7008 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7009 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7010 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
7011
7012#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7013 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7014 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7015 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7016
7017#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7018 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7019 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7020 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
7021
7022#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7023 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
7024 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7025 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7026
7027
7028#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7029 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7030 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7031 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
7032
7033#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7034 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7035 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7036 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
7037
7038#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7039 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7040 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7041 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7042
7043#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7044 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7045 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7046 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
7047
7048#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7049 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
7050 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7051 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7052
7053
7054#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7055 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7056 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7057 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
7058
7059#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7060 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7061 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7062 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
7063#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7065 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7066 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7067
7068#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7070 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7071 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
7072
7073#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7074 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
7075 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7076 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7077
7078
7079#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7080 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7081 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7082 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
7083
7084#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7085 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7086 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7087 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
7088
7089
7090#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7091 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7092 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7093 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
7094
7095#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7096 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7097 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7098 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
7099
7100#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7101 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7102 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7103 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
7104
7105#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7107 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7108 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
7109
7110
7111
7112#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7113 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7114 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7115 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
7116
7117#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7118 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7119 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7120 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
7121
7122#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7123 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7124 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7125 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
7126
7127#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7128 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7129 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7130 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
7131
7132
7133#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7134 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7135 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7136 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
7137
7138#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7139 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7140 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7141 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
7142
7143#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7144 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7145 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7146 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7147
7148#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7149 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7150 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7151 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
7152
7153#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
7154 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
7155 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7156 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7157
7158
7159#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7160 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7161 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7162 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
7163
7164#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7165 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7166 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7167 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
7168
7169#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7170 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7171 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7172 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7173
7174#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7175 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7176 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7177 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7178
7179#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7180 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7181 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7182 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7183
7184
7185#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7186 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7187 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7188 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7189
7190#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7191 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7192 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7193 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7194
7195#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7196 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7197 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7198 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7199
7200#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7201 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7202 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7203 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7204
7205#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7206 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7207 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7208 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7209
7210
7211#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7212 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7213 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7214 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7215
7216#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7217 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7218 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7219 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7220
7221
7222#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7223 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7224 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7225 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7226
7227#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7228 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7229 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7230 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7231
7232#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7233 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7234 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7235 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7236
7237#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7238 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7239 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7240 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7241
7242
7243DECL_INLINE_THROW(uint32_t)
7244iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7245 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7246 uintptr_t pfnFunction, uint8_t idxInstr)
7247{
7248 /*
7249 * Assert sanity.
7250 */
7251 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7252 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7253 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7254 && pVarMem->cbVar == sizeof(void *),
7255 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7256
7257 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7259 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7260 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7261 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7262
7263 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7265 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7266 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7267 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7268
7269 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7270
7271 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7272
7273#ifdef VBOX_STRICT
7274# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7275 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7276 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7277 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7278 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7279# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7280 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7281 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7282 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7283
7284 if (iSegReg == UINT8_MAX)
7285 {
7286 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7287 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7288 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7289 switch (cbMem)
7290 {
7291 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7292 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7293 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7294 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7295 case 10:
7296 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7297 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7298 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7299 break;
7300 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7301# if 0
7302 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7303 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7304# endif
7305 default: AssertFailed(); break;
7306 }
7307 }
7308 else
7309 {
7310 Assert(iSegReg < 6);
7311 switch (cbMem)
7312 {
7313 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7314 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7315 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7316 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7317 case 10:
7318 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7319 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7320 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7321 break;
7322 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7323# if 0
7324 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7325 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7326# endif
7327 default: AssertFailed(); break;
7328 }
7329 }
7330# undef IEM_MAP_HLP_FN
7331# undef IEM_MAP_HLP_FN_NO_AT
7332#endif
7333
7334#ifdef VBOX_STRICT
7335 /*
7336 * Check that the fExec flags we've got make sense.
7337 */
7338 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7339#endif
7340
7341 /*
7342 * To keep things simple we have to commit any pending writes first as we
7343 * may end up making calls.
7344 */
7345 off = iemNativeRegFlushPendingWrites(pReNative, off);
7346
7347#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7348 /*
7349 * Move/spill/flush stuff out of call-volatile registers.
7350 * This is the easy way out. We could contain this to the tlb-miss branch
7351 * by saving and restoring active stuff here.
7352 */
7353 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7354 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7355#endif
7356
7357 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7358 while the tlb-miss codepath will temporarily put it on the stack.
7359 Set the the type to stack here so we don't need to do it twice below. */
7360 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7361 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7362 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7363 * lookup is done. */
7364
7365 /*
7366 * Define labels and allocate the result register (trying for the return
7367 * register if we can).
7368 */
7369 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7370 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7371 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7372 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7373 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7374 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7375 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7376 : UINT32_MAX;
7377//off=iemNativeEmitBrk(pReNative, off, 0);
7378 /*
7379 * Jump to the TLB lookup code.
7380 */
7381 if (!TlbState.fSkip)
7382 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7383
7384 /*
7385 * TlbMiss:
7386 *
7387 * Call helper to do the fetching.
7388 * We flush all guest register shadow copies here.
7389 */
7390 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7391
7392#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7393 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7394#else
7395 RT_NOREF(idxInstr);
7396#endif
7397
7398#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7399 /* Save variables in volatile registers. */
7400 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7401 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7402#endif
7403
7404 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7405 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7406#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7407 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7408#else
7409 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7410#endif
7411
7412 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7413 if (iSegReg != UINT8_MAX)
7414 {
7415 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7416 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7417 }
7418
7419 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7420 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7421 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7422
7423 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7424 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7425
7426 /* Done setting up parameters, make the call. */
7427 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7428
7429 /*
7430 * Put the output in the right registers.
7431 */
7432 Assert(idxRegMemResult == pVarMem->idxReg);
7433 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7435
7436#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7437 /* Restore variables and guest shadow registers to volatile registers. */
7438 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7439 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7440#endif
7441
7442 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7443 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7444
7445#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7446 if (!TlbState.fSkip)
7447 {
7448 /* end of tlbsmiss - Jump to the done label. */
7449 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7450 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7451
7452 /*
7453 * TlbLookup:
7454 */
7455 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7456 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7457# ifdef VBOX_WITH_STATISTICS
7458 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7459 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7460# endif
7461
7462 /* [idxVarUnmapInfo] = 0; */
7463 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7464
7465 /*
7466 * TlbDone:
7467 */
7468 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7469
7470 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7471
7472# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7473 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7474 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7475# endif
7476 }
7477#else
7478 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7479#endif
7480
7481 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7482 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7483
7484 return off;
7485}
7486
7487
7488#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7489 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7490 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7491
7492#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7493 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7494 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7495
7496#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7497 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7498 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7499
7500#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7501 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7502 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7503
7504DECL_INLINE_THROW(uint32_t)
7505iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7506 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7507{
7508 /*
7509 * Assert sanity.
7510 */
7511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7512#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7513 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7514#endif
7515 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7516 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7517 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7518#ifdef VBOX_STRICT
7519 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7520 {
7521 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7522 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7523 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7524 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7525 case IEM_ACCESS_TYPE_WRITE:
7526 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7527 case IEM_ACCESS_TYPE_READ:
7528 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7529 default: AssertFailed();
7530 }
7531#else
7532 RT_NOREF(fAccess);
7533#endif
7534
7535 /*
7536 * To keep things simple we have to commit any pending writes first as we
7537 * may end up making calls (there shouldn't be any at this point, so this
7538 * is just for consistency).
7539 */
7540 /** @todo we could postpone this till we make the call and reload the
7541 * registers after returning from the call. Not sure if that's sensible or
7542 * not, though. */
7543 off = iemNativeRegFlushPendingWrites(pReNative, off);
7544
7545 /*
7546 * Move/spill/flush stuff out of call-volatile registers.
7547 *
7548 * We exclude any register holding the bUnmapInfo variable, as we'll be
7549 * checking it after returning from the call and will free it afterwards.
7550 */
7551 /** @todo save+restore active registers and maybe guest shadows in miss
7552 * scenario. */
7553 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7554 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7555
7556 /*
7557 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7558 * to call the unmap helper function.
7559 *
7560 * The likelyhood of it being zero is higher than for the TLB hit when doing
7561 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7562 * access should also end up with a mapping that won't need special unmapping.
7563 */
7564 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7565 * should speed up things for the pure interpreter as well when TLBs
7566 * are enabled. */
7567#ifdef RT_ARCH_AMD64
7568 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7569 {
7570 /* test byte [rbp - xxx], 0ffh */
7571 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7572 pbCodeBuf[off++] = 0xf6;
7573 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7574 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7575 pbCodeBuf[off++] = 0xff;
7576 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7577 }
7578 else
7579#endif
7580 {
7581 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7582 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7583 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7584 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7585 }
7586 uint32_t const offJmpFixup = off;
7587 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7588
7589 /*
7590 * Call the unmap helper function.
7591 */
7592#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7593 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7594#else
7595 RT_NOREF(idxInstr);
7596#endif
7597
7598 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7599 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7600 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7601
7602 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7603 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7604
7605 /* Done setting up parameters, make the call. */
7606 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7607
7608 /* The bUnmapInfo variable is implictly free by these MCs. */
7609 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7610
7611 /*
7612 * Done, just fixup the jump for the non-call case.
7613 */
7614 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7615
7616 return off;
7617}
7618
7619
7620
7621/*********************************************************************************************************************************
7622* State and Exceptions *
7623*********************************************************************************************************************************/
7624
7625#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7626#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7627
7628#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7629#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7630#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7631
7632#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7633#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7634#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7635
7636
7637DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7638{
7639 /** @todo this needs a lot more work later. */
7640 RT_NOREF(pReNative, fForChange);
7641 return off;
7642}
7643
7644
7645
7646/*********************************************************************************************************************************
7647* Emitters for FPU related operations. *
7648*********************************************************************************************************************************/
7649
7650#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7651 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7652
7653/** Emits code for IEM_MC_FETCH_FCW. */
7654DECL_INLINE_THROW(uint32_t)
7655iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7656{
7657 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7658 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7659
7660 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7661
7662 /* Allocate a temporary FCW register. */
7663 /** @todo eliminate extra register */
7664 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7665 kIemNativeGstRegUse_ReadOnly);
7666
7667 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7668
7669 /* Free but don't flush the FCW register. */
7670 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7671 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7672
7673 return off;
7674}
7675
7676
7677#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7678 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7679
7680/** Emits code for IEM_MC_FETCH_FSW. */
7681DECL_INLINE_THROW(uint32_t)
7682iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7683{
7684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7685 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7686
7687 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7688 /* Allocate a temporary FSW register. */
7689 /** @todo eliminate extra register */
7690 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7691 kIemNativeGstRegUse_ReadOnly);
7692
7693 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7694
7695 /* Free but don't flush the FSW register. */
7696 iemNativeRegFreeTmp(pReNative, idxFswReg);
7697 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7698
7699 return off;
7700}
7701
7702
7703
7704#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7705
7706
7707/*********************************************************************************************************************************
7708* Emitters for SSE/AVX specific operations. *
7709*********************************************************************************************************************************/
7710
7711#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7712 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7713
7714/** Emits code for IEM_MC_COPY_XREG_U128. */
7715DECL_INLINE_THROW(uint32_t)
7716iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7717{
7718 /* This is a nop if the source and destination register are the same. */
7719 if (iXRegDst != iXRegSrc)
7720 {
7721 /* Allocate destination and source register. */
7722 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7723 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7724 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7725 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7726
7727 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7728
7729 /* Free but don't flush the source and destination register. */
7730 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7731 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7732 }
7733
7734 return off;
7735}
7736
7737
7738#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7739 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7740
7741/** Emits code for IEM_MC_FETCH_XREG_U128. */
7742DECL_INLINE_THROW(uint32_t)
7743iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7744{
7745 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7746 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7747
7748 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7749 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7750
7751 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7752
7753 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7754
7755 /* Free but don't flush the source register. */
7756 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7757 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7758
7759 return off;
7760}
7761
7762
7763#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7764 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7765
7766#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
7767 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
7768
7769/** Emits code for IEM_MC_FETCH_XREG_U64. */
7770DECL_INLINE_THROW(uint32_t)
7771iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7772{
7773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7775
7776 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7777 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7778
7779 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7780 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7781
7782 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7783
7784 /* Free but don't flush the source register. */
7785 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7786 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7787
7788 return off;
7789}
7790
7791
7792#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
7793 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
7794
7795#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
7796 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
7797
7798/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
7799DECL_INLINE_THROW(uint32_t)
7800iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7801{
7802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7803 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7804
7805 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7806 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7807
7808 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7809 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7810
7811 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7812
7813 /* Free but don't flush the source register. */
7814 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7815 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7816
7817 return off;
7818}
7819
7820
7821#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7822 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7823
7824/** Emits code for IEM_MC_FETCH_XREG_U16. */
7825DECL_INLINE_THROW(uint32_t)
7826iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7827{
7828 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7829 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7830
7831 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7832 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7833
7834 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7835 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7836
7837 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7838
7839 /* Free but don't flush the source register. */
7840 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7841 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7842
7843 return off;
7844}
7845
7846
7847#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7848 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7849
7850/** Emits code for IEM_MC_FETCH_XREG_U8. */
7851DECL_INLINE_THROW(uint32_t)
7852iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7853{
7854 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7855 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7856
7857 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7858 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7859
7860 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7861 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7862
7863 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7864
7865 /* Free but don't flush the source register. */
7866 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7867 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7868
7869 return off;
7870}
7871
7872
7873#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7874 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7875
7876AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7877#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7878 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7879
7880
7881/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7882DECL_INLINE_THROW(uint32_t)
7883iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7884{
7885 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7886 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7887
7888 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7889 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7890 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7891
7892 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7893
7894 /* Free but don't flush the source register. */
7895 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7896 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7897
7898 return off;
7899}
7900
7901
7902#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7903 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7904
7905#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7906 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7907
7908#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7909 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7910
7911#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7912 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7913
7914#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7915 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7916
7917#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7918 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7919
7920/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7921DECL_INLINE_THROW(uint32_t)
7922iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
7923 uint8_t cbLocal, uint8_t iElem)
7924{
7925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7926 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7927
7928#ifdef VBOX_STRICT
7929 switch (cbLocal)
7930 {
7931 case sizeof(uint64_t): Assert(iElem < 2); break;
7932 case sizeof(uint32_t): Assert(iElem < 4); break;
7933 case sizeof(uint16_t): Assert(iElem < 8); break;
7934 case sizeof(uint8_t): Assert(iElem < 16); break;
7935 default: AssertFailed();
7936 }
7937#endif
7938
7939 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7940 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7941 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7942
7943 switch (cbLocal)
7944 {
7945 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7946 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7947 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7948 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7949 default: AssertFailed();
7950 }
7951
7952 /* Free but don't flush the source register. */
7953 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7954 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7955
7956 return off;
7957}
7958
7959
7960#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7961 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7962
7963/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
7964DECL_INLINE_THROW(uint32_t)
7965iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7966{
7967 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7968 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7969
7970 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7971 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7972 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7973
7974 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7975 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7976 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7977
7978 /* Free but don't flush the source register. */
7979 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7980 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7981
7982 return off;
7983}
7984
7985
7986#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7987 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7988
7989/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
7990DECL_INLINE_THROW(uint32_t)
7991iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7992{
7993 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7994 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7995
7996 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7997 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7998 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7999
8000 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
8001 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8002 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8003
8004 /* Free but don't flush the source register. */
8005 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8006 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8007
8008 return off;
8009}
8010
8011
8012#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
8013 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
8014
8015/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
8016DECL_INLINE_THROW(uint32_t)
8017iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
8018 uint8_t idxSrcVar, uint8_t iDwSrc)
8019{
8020 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8021 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8022
8023 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8024 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8025 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8026
8027 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
8028 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
8029
8030 /* Free but don't flush the destination register. */
8031 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8032 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8033
8034 return off;
8035}
8036
8037
8038#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8039 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8040
8041/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
8042DECL_INLINE_THROW(uint32_t)
8043iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8044{
8045 /*
8046 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8047 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8048 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8049 */
8050 if (iYRegDst != iYRegSrc)
8051 {
8052 /* Allocate destination and source register. */
8053 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8054 kIemNativeGstSimdRegLdStSz_256,
8055 kIemNativeGstRegUse_ForFullWrite);
8056 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8057 kIemNativeGstSimdRegLdStSz_Low128,
8058 kIemNativeGstRegUse_ReadOnly);
8059
8060 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8061 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8062
8063 /* Free but don't flush the source and destination register. */
8064 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8065 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8066 }
8067 else
8068 {
8069 /* This effectively only clears the upper 128-bits of the register. */
8070 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8071 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8072
8073 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8074
8075 /* Free but don't flush the destination register. */
8076 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8077 }
8078
8079 return off;
8080}
8081
8082
8083#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8084 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8085
8086/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
8087DECL_INLINE_THROW(uint32_t)
8088iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8089{
8090 /*
8091 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8092 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8093 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8094 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
8095 */
8096 if (iYRegDst != iYRegSrc)
8097 {
8098 /* Allocate destination and source register. */
8099 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8100 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8101 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8102 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8103
8104 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8105
8106 /* Free but don't flush the source and destination register. */
8107 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8108 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8109 }
8110
8111 return off;
8112}
8113
8114
8115#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
8116 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
8117
8118/** Emits code for IEM_MC_FETCH_YREG_U128. */
8119DECL_INLINE_THROW(uint32_t)
8120iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
8121{
8122 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8123 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8124
8125 Assert(iDQWord <= 1);
8126 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8127 iDQWord == 1
8128 ? kIemNativeGstSimdRegLdStSz_High128
8129 : kIemNativeGstSimdRegLdStSz_Low128,
8130 kIemNativeGstRegUse_ReadOnly);
8131
8132 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8133 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8134
8135 if (iDQWord == 1)
8136 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8137 else
8138 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8139
8140 /* Free but don't flush the source register. */
8141 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8142 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8143
8144 return off;
8145}
8146
8147
8148#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
8149 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
8150
8151/** Emits code for IEM_MC_FETCH_YREG_U64. */
8152DECL_INLINE_THROW(uint32_t)
8153iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
8154{
8155 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8156 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8157
8158 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8159 iQWord >= 2
8160 ? kIemNativeGstSimdRegLdStSz_High128
8161 : kIemNativeGstSimdRegLdStSz_Low128,
8162 kIemNativeGstRegUse_ReadOnly);
8163
8164 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8165 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8166
8167 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8168
8169 /* Free but don't flush the source register. */
8170 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8171 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8172
8173 return off;
8174}
8175
8176
8177#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8178 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8179
8180/** Emits code for IEM_MC_FETCH_YREG_U32. */
8181DECL_INLINE_THROW(uint32_t)
8182iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8183{
8184 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8185 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8186
8187 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8188 iDWord >= 4
8189 ? kIemNativeGstSimdRegLdStSz_High128
8190 : kIemNativeGstSimdRegLdStSz_Low128,
8191 kIemNativeGstRegUse_ReadOnly);
8192
8193 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8194 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8195
8196 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8197
8198 /* Free but don't flush the source register. */
8199 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8200 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8201
8202 return off;
8203}
8204
8205
8206#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8207 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8208
8209/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8210DECL_INLINE_THROW(uint32_t)
8211iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8212{
8213 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8214 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8215
8216 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8217
8218 /* Free but don't flush the register. */
8219 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8220
8221 return off;
8222}
8223
8224
8225#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8226 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8227
8228/** Emits code for IEM_MC_STORE_YREG_U128. */
8229DECL_INLINE_THROW(uint32_t)
8230iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8231{
8232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8233 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8234
8235 Assert(iDQword <= 1);
8236 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8237 iDQword == 0
8238 ? kIemNativeGstSimdRegLdStSz_Low128
8239 : kIemNativeGstSimdRegLdStSz_High128,
8240 kIemNativeGstRegUse_ForFullWrite);
8241
8242 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8243
8244 if (iDQword == 0)
8245 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8246 else
8247 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8248
8249 /* Free but don't flush the source register. */
8250 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8251 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8252
8253 return off;
8254}
8255
8256
8257#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8258 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8259
8260/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8261DECL_INLINE_THROW(uint32_t)
8262iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8263{
8264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8265 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8266
8267 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8268 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8269
8270 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8271
8272 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8273 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8274
8275 /* Free but don't flush the source register. */
8276 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8277 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8278
8279 return off;
8280}
8281
8282
8283#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8284 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8285
8286/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8287DECL_INLINE_THROW(uint32_t)
8288iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8289{
8290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8291 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8292
8293 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8294 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8295
8296 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8297
8298 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8299 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8300
8301 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8302 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8303
8304 return off;
8305}
8306
8307
8308#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8309 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8310
8311/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8312DECL_INLINE_THROW(uint32_t)
8313iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8314{
8315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8316 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8317
8318 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8319 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8320
8321 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8322
8323 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8324 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8325
8326 /* Free but don't flush the source register. */
8327 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8328 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8329
8330 return off;
8331}
8332
8333
8334#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8335 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8336
8337/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8338DECL_INLINE_THROW(uint32_t)
8339iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8340{
8341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8342 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8343
8344 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8345 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8346
8347 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8348
8349 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8350 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8351
8352 /* Free but don't flush the source register. */
8353 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8354 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8355
8356 return off;
8357}
8358
8359
8360#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8361 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8362
8363/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8364DECL_INLINE_THROW(uint32_t)
8365iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8366{
8367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8368 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8369
8370 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8371 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8372
8373 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8374
8375 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8376 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8377
8378 /* Free but don't flush the source register. */
8379 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8380 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8381
8382 return off;
8383}
8384
8385
8386#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8387 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8388
8389/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8390DECL_INLINE_THROW(uint32_t)
8391iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8392{
8393 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8394 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8395
8396 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8397 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8398
8399 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8400
8401 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8402
8403 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8404 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8405
8406 return off;
8407}
8408
8409
8410#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8411 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8412
8413/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8414DECL_INLINE_THROW(uint32_t)
8415iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8416{
8417 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8418 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8419
8420 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8421 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8422
8423 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8424
8425 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8426
8427 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8428 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8429
8430 return off;
8431}
8432
8433
8434#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8435 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8436
8437/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8438DECL_INLINE_THROW(uint32_t)
8439iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8440{
8441 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8442 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8443
8444 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8445 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8446
8447 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8448
8449 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8450
8451 /* Free but don't flush the source register. */
8452 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8453 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8454
8455 return off;
8456}
8457
8458
8459#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8460 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8461
8462/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8463DECL_INLINE_THROW(uint32_t)
8464iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8465{
8466 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8467 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8468
8469 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8470 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8471
8472 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8473
8474 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8475
8476 /* Free but don't flush the source register. */
8477 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8478 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8479
8480 return off;
8481}
8482
8483
8484#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8485 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8486
8487/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8488DECL_INLINE_THROW(uint32_t)
8489iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8490{
8491 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8492 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8493
8494 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8495 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8496
8497 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8498
8499 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8500
8501 /* Free but don't flush the source register. */
8502 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8503 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8504
8505 return off;
8506}
8507
8508
8509#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8510 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8511
8512/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8513DECL_INLINE_THROW(uint32_t)
8514iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8515{
8516 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8517 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8518
8519 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8520 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8521
8522 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8523
8524 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8525 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8526
8527 /* Free but don't flush the source register. */
8528 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8529 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8530
8531 return off;
8532}
8533
8534
8535#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8536 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8537
8538/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8539DECL_INLINE_THROW(uint32_t)
8540iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8541{
8542 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8543 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8544
8545 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8546 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8547
8548 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8549
8550 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8551 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8552
8553 /* Free but don't flush the source register. */
8554 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8555 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8556
8557 return off;
8558}
8559
8560
8561#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8562 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8563
8564/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8565DECL_INLINE_THROW(uint32_t)
8566iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8567{
8568 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8569 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8570
8571 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8572 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8573 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8574 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8575 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8576
8577 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8578 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8579 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8580
8581 /* Free but don't flush the source and destination registers. */
8582 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8583 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8584 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8585
8586 return off;
8587}
8588
8589
8590#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8591 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8592
8593/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8594DECL_INLINE_THROW(uint32_t)
8595iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8596{
8597 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8598 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8599
8600 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8601 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8602 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8603 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8604 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8605
8606 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8607 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8608 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8609
8610 /* Free but don't flush the source and destination registers. */
8611 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8612 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8613 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8614
8615 return off;
8616}
8617
8618
8619#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8620 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8621
8622
8623/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8624DECL_INLINE_THROW(uint32_t)
8625iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8626{
8627 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8628 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8629
8630 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8631 if (bImm8Mask & RT_BIT(0))
8632 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8633 if (bImm8Mask & RT_BIT(1))
8634 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8635 if (bImm8Mask & RT_BIT(2))
8636 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8637 if (bImm8Mask & RT_BIT(3))
8638 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8639
8640 /* Free but don't flush the destination register. */
8641 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8642
8643 return off;
8644}
8645
8646
8647#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8648 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8649
8650
8651/** Emits code for IEM_MC_FETCH_YREG_U256. */
8652DECL_INLINE_THROW(uint32_t)
8653iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8654{
8655 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8656 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8657
8658 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8659 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8660 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8661
8662 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8663
8664 /* Free but don't flush the source register. */
8665 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8666 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8667
8668 return off;
8669}
8670
8671
8672#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8673 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8674
8675
8676/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8677DECL_INLINE_THROW(uint32_t)
8678iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8679{
8680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8681 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8682
8683 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8684 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8685 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8686
8687 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8688
8689 /* Free but don't flush the source register. */
8690 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8691 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8692
8693 return off;
8694}
8695
8696
8697#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
8698 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
8699
8700
8701/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
8702DECL_INLINE_THROW(uint32_t)
8703iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
8704 uint8_t idxSrcVar, uint8_t iDwSrc)
8705{
8706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8707 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8708
8709 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8710 iDwDst < 4
8711 ? kIemNativeGstSimdRegLdStSz_Low128
8712 : kIemNativeGstSimdRegLdStSz_High128,
8713 kIemNativeGstRegUse_ForUpdate);
8714 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8715 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8716
8717 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
8718 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
8719
8720 /* Free but don't flush the source register. */
8721 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8722 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8723 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8724
8725 return off;
8726}
8727
8728
8729#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
8730 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
8731
8732
8733/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
8734DECL_INLINE_THROW(uint32_t)
8735iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
8736 uint8_t idxSrcVar, uint8_t iQwSrc)
8737{
8738 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8739 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8740
8741 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8742 iQwDst < 2
8743 ? kIemNativeGstSimdRegLdStSz_Low128
8744 : kIemNativeGstSimdRegLdStSz_High128,
8745 kIemNativeGstRegUse_ForUpdate);
8746 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8747 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8748
8749 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
8750 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
8751
8752 /* Free but don't flush the source register. */
8753 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8754 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8755 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8756
8757 return off;
8758}
8759
8760
8761#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
8762 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
8763
8764
8765/** Emits code for IEM_MC_STORE_YREG_U64. */
8766DECL_INLINE_THROW(uint32_t)
8767iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
8768{
8769 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8770 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8771
8772 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8773 iQwDst < 2
8774 ? kIemNativeGstSimdRegLdStSz_Low128
8775 : kIemNativeGstSimdRegLdStSz_High128,
8776 kIemNativeGstRegUse_ForUpdate);
8777
8778 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8779
8780 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
8781
8782 /* Free but don't flush the source register. */
8783 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8784 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8785
8786 return off;
8787}
8788
8789
8790#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
8791 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
8792
8793/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
8794DECL_INLINE_THROW(uint32_t)
8795iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8796{
8797 RT_NOREF(pReNative, iYReg);
8798 /** @todo Needs to be implemented when support for AVX-512 is added. */
8799 return off;
8800}
8801
8802
8803#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
8804 off = iemNativeEmitSimdSseStoreResult(pReNative, off, a_SseData, a_iXmmReg)
8805
8806/** Emits code for IEM_MC_STORE_SSE_RESULT. */
8807DECL_INLINE_THROW(uint32_t)
8808iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseRes, uint8_t iXReg)
8809{
8810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseRes);
8811 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseRes, sizeof(X86XMMREG));
8812
8813 /* The ForUpdate is important as we might end up not writing the result value to the register in case of an unmasked exception. */
8814 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8815 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8816 uint8_t const idxVarRegRes = iemNativeVarSimdRegisterAcquire(pReNative, idxSseRes, &off, true /*fInitalized*/);
8817 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8818 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8819
8820 /* Update the value if there is no unmasked exception. */
8821 /* tmp = mxcsr */
8822 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8823 /* tmp &= X86_MXCSR_XCPT_MASK */
8824 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
8825 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
8826 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
8827 /* tmp = ~tmp */
8828 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
8829 /* tmp &= mxcsr */
8830 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8831
8832 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8833 uint32_t offFixup = off;
8834 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8835 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarRegRes);
8836 iemNativeFixupFixedJump(pReNative, offFixup, off);
8837
8838 /* Free but don't flush the shadowed register. */
8839 iemNativeVarRegisterRelease(pReNative, idxSseRes);
8840 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8841 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8842 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8843
8844 return off;
8845}
8846
8847
8848/*********************************************************************************************************************************
8849* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8850*********************************************************************************************************************************/
8851
8852/**
8853 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
8854 */
8855DECL_INLINE_THROW(uint32_t)
8856iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8857{
8858 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
8859 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
8860 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8861 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8862
8863 /*
8864 * Need to do the FPU preparation.
8865 */
8866 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8867
8868 /*
8869 * Do all the call setup and cleanup.
8870 */
8871 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
8872
8873 /*
8874 * Load the MXCSR register into the first argument and mask out the current exception flags.
8875 */
8876 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
8877 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
8878
8879 /*
8880 * Make the call.
8881 */
8882 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8883
8884 /*
8885 * The updated MXCSR is in the return register.
8886 */
8887 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
8888
8889#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8890 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8891 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8892#endif
8893 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8894
8895 return off;
8896}
8897
8898
8899#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8900 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8901
8902/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8903DECL_INLINE_THROW(uint32_t)
8904iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8905{
8906 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8907 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8908 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8909}
8910
8911
8912#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8913 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8914
8915/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8916DECL_INLINE_THROW(uint32_t)
8917iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8918{
8919 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8920 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8921 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8922 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8923}
8924
8925
8926/*********************************************************************************************************************************
8927* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8928*********************************************************************************************************************************/
8929
8930#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8931 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8932
8933/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8934DECL_INLINE_THROW(uint32_t)
8935iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8936{
8937 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8938 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8939 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8940}
8941
8942
8943#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8944 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8945
8946/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8947DECL_INLINE_THROW(uint32_t)
8948iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8949{
8950 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8951 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8952 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8953 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8954}
8955#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8956
8957
8958/*********************************************************************************************************************************
8959* Include instruction emitters. *
8960*********************************************************************************************************************************/
8961#include "target-x86/IEMAllN8veEmit-x86.h"
8962
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette