VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104361

Last change on this file since 104361 was 104357, checked in by vboxsync, 10 months ago

VMM/IEM: Adding a TB lookup table to each TB, so we can cache which TB comes next on branches and other TB exits/completion. bugref:10656

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 430.0 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104357 2024-04-17 21:57:03Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(pCallEntry);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
365 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
366 Assert(idxTbLookupFirst < pReNative->pTbOrg->cTbLookupEntries);
367 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pReNative->pTbOrg, idxTbLookupFirst);
368 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
369 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
370 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
371
372 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
373 }
374 return off;
375}
376
377
378#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
379 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
380 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
381
382#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
383 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
384 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
385 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
386
387/** Same as iemRegAddToRip64AndFinishingNoFlags. */
388DECL_INLINE_THROW(uint32_t)
389iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
390{
391#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
392# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
393 if (!pReNative->Core.offPc)
394 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395# endif
396
397 /* Allocate a temporary PC register. */
398 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
399
400 /* Perform the addition and store the result. */
401 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
402 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
403
404 /* Free but don't flush the PC register. */
405 iemNativeRegFreeTmp(pReNative, idxPcReg);
406#endif
407
408#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
409 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
410
411 pReNative->Core.offPc += cbInstr;
412# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
413 off = iemNativePcAdjustCheck(pReNative, off);
414# endif
415 if (pReNative->cCondDepth)
416 off = iemNativeEmitPcWriteback(pReNative, off);
417 else
418 pReNative->Core.cInstrPcUpdateSkipped++;
419#endif
420
421 return off;
422}
423
424
425#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
426 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
427 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
428
429#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
430 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
432 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
433
434/** Same as iemRegAddToEip32AndFinishingNoFlags. */
435DECL_INLINE_THROW(uint32_t)
436iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
437{
438#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
439# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
440 if (!pReNative->Core.offPc)
441 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442# endif
443
444 /* Allocate a temporary PC register. */
445 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
446
447 /* Perform the addition and store the result. */
448 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
449 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
450
451 /* Free but don't flush the PC register. */
452 iemNativeRegFreeTmp(pReNative, idxPcReg);
453#endif
454
455#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
456 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
457
458 pReNative->Core.offPc += cbInstr;
459# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
460 off = iemNativePcAdjustCheck(pReNative, off);
461# endif
462 if (pReNative->cCondDepth)
463 off = iemNativeEmitPcWriteback(pReNative, off);
464 else
465 pReNative->Core.cInstrPcUpdateSkipped++;
466#endif
467
468 return off;
469}
470
471
472#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
473 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
474 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
475
476#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
477 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
479 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
480
481/** Same as iemRegAddToIp16AndFinishingNoFlags. */
482DECL_INLINE_THROW(uint32_t)
483iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
484{
485#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
486# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
487 if (!pReNative->Core.offPc)
488 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
489# endif
490
491 /* Allocate a temporary PC register. */
492 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
493
494 /* Perform the addition and store the result. */
495 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
496 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
497 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
498
499 /* Free but don't flush the PC register. */
500 iemNativeRegFreeTmp(pReNative, idxPcReg);
501#endif
502
503#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
504 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
505
506 pReNative->Core.offPc += cbInstr;
507# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
508 off = iemNativePcAdjustCheck(pReNative, off);
509# endif
510 if (pReNative->cCondDepth)
511 off = iemNativeEmitPcWriteback(pReNative, off);
512 else
513 pReNative->Core.cInstrPcUpdateSkipped++;
514#endif
515
516 return off;
517}
518
519
520
521/*********************************************************************************************************************************
522* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
523*********************************************************************************************************************************/
524
525#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
526 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
527 (a_enmEffOpSize), pCallEntry->idxInstr); \
528 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
529
530#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
531 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
532 (a_enmEffOpSize), pCallEntry->idxInstr); \
533 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
534 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
535
536#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
537 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
538 IEMMODE_16BIT, pCallEntry->idxInstr); \
539 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
540
541#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
542 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
543 IEMMODE_16BIT, pCallEntry->idxInstr); \
544 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
545 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
546
547#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
548 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
549 IEMMODE_64BIT, pCallEntry->idxInstr); \
550 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
551
552#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
553 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
554 IEMMODE_64BIT, pCallEntry->idxInstr); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
557
558/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
559 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
560 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
561DECL_INLINE_THROW(uint32_t)
562iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
563 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
564{
565 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
566
567 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
568 off = iemNativeRegFlushPendingWrites(pReNative, off);
569
570#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
571 Assert(pReNative->Core.offPc == 0);
572
573 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
574#endif
575
576 /* Allocate a temporary PC register. */
577 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
578
579 /* Perform the addition. */
580 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
581
582 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
583 {
584 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
585 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
586 }
587 else
588 {
589 /* Just truncate the result to 16-bit IP. */
590 Assert(enmEffOpSize == IEMMODE_16BIT);
591 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
592 }
593 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
594
595 /* Free but don't flush the PC register. */
596 iemNativeRegFreeTmp(pReNative, idxPcReg);
597
598 return off;
599}
600
601
602#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
603 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
604 (a_enmEffOpSize), pCallEntry->idxInstr); \
605 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
606
607#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
608 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
609 (a_enmEffOpSize), pCallEntry->idxInstr); \
610 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
611 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
612
613#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
614 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
615 IEMMODE_16BIT, pCallEntry->idxInstr); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
617
618#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
620 IEMMODE_16BIT, pCallEntry->idxInstr); \
621 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
622 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
623
624#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
625 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
626 IEMMODE_32BIT, pCallEntry->idxInstr); \
627 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
628
629#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
630 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
631 IEMMODE_32BIT, pCallEntry->idxInstr); \
632 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
633 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
634
635/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
636 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
637 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
640 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
641{
642 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
643
644 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
645 off = iemNativeRegFlushPendingWrites(pReNative, off);
646
647#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
648 Assert(pReNative->Core.offPc == 0);
649
650 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
651#endif
652
653 /* Allocate a temporary PC register. */
654 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
655
656 /* Perform the addition. */
657 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
658
659 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
660 if (enmEffOpSize == IEMMODE_16BIT)
661 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
662
663 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
664/** @todo we can skip this in 32-bit FLAT mode. */
665 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
666
667 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
668
669 /* Free but don't flush the PC register. */
670 iemNativeRegFreeTmp(pReNative, idxPcReg);
671
672 return off;
673}
674
675
676#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
677 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
678 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
679
680#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
681 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
682 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
683 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
684
685#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
686 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
687 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
688
689#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
690 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
691 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
692 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
693
694#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
695 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
697
698#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
700 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
701 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
702
703/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
704DECL_INLINE_THROW(uint32_t)
705iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
706 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
707{
708 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
709 off = iemNativeRegFlushPendingWrites(pReNative, off);
710
711#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
712 Assert(pReNative->Core.offPc == 0);
713
714 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
715#endif
716
717 /* Allocate a temporary PC register. */
718 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
719
720 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
721 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
722 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
723 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
724 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
725
726 /* Free but don't flush the PC register. */
727 iemNativeRegFreeTmp(pReNative, idxPcReg);
728
729 return off;
730}
731
732
733
734/*********************************************************************************************************************************
735* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
736*********************************************************************************************************************************/
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
743#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
744 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
745
746/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
747#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
748 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
749
750/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
751 * clears flags. */
752#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
753 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
754 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
755
756/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
757 * clears flags. */
758#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
759 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
760 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
761
762/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
763 * clears flags. */
764#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
765 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
766 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
767
768#undef IEM_MC_SET_RIP_U16_AND_FINISH
769
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
772#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
773 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
774
775/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
776#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
777 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
778
779/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
780 * clears flags. */
781#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
782 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
783 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
784
785/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
786 * and clears flags. */
787#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
788 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
789 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
790
791#undef IEM_MC_SET_RIP_U32_AND_FINISH
792
793
794/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
795#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
796 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
797
798/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
799 * and clears flags. */
800#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
801 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
802 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
803
804#undef IEM_MC_SET_RIP_U64_AND_FINISH
805
806
807/** Same as iemRegRipJumpU16AndFinishNoFlags,
808 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
811 uint8_t idxInstr, uint8_t cbVar)
812{
813 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
814 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
815
816 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
817 off = iemNativeRegFlushPendingWrites(pReNative, off);
818
819#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
820 Assert(pReNative->Core.offPc == 0);
821
822 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
823#endif
824
825 /* Get a register with the new PC loaded from idxVarPc.
826 Note! This ASSUMES that the high bits of the GPR is zeroed. */
827 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
828
829 /* Check limit (may #GP(0) + exit TB). */
830 if (!f64Bit)
831/** @todo we can skip this test in FLAT 32-bit mode. */
832 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
833 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
834 else if (cbVar > sizeof(uint32_t))
835 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
836
837 /* Store the result. */
838 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
839
840 iemNativeVarRegisterRelease(pReNative, idxVarPc);
841 /** @todo implictly free the variable? */
842
843 return off;
844}
845
846
847
848/*********************************************************************************************************************************
849* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
850*********************************************************************************************************************************/
851
852#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
853 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
854
855/**
856 * Emits code to check if a \#NM exception should be raised.
857 *
858 * @returns New code buffer offset, UINT32_MAX on failure.
859 * @param pReNative The native recompile state.
860 * @param off The code buffer offset.
861 * @param idxInstr The current instruction.
862 */
863DECL_INLINE_THROW(uint32_t)
864iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
865{
866#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
867 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
868
869 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
870 {
871#endif
872 /*
873 * Make sure we don't have any outstanding guest register writes as we may
874 * raise an #NM and all guest register must be up to date in CPUMCTX.
875 */
876 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
877 off = iemNativeRegFlushPendingWrites(pReNative, off);
878
879#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
880 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
881#else
882 RT_NOREF(idxInstr);
883#endif
884
885 /* Allocate a temporary CR0 register. */
886 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
887 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
888
889 /*
890 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
891 * return raisexcpt();
892 */
893 /* Test and jump. */
894 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
895
896 /* Free but don't flush the CR0 register. */
897 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
898
899#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
900 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
901 }
902 else
903 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
904#endif
905
906 return off;
907}
908
909
910#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
911 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
912
913/**
914 * Emits code to check if a \#NM exception should be raised.
915 *
916 * @returns New code buffer offset, UINT32_MAX on failure.
917 * @param pReNative The native recompile state.
918 * @param off The code buffer offset.
919 * @param idxInstr The current instruction.
920 */
921DECL_INLINE_THROW(uint32_t)
922iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
923{
924#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
925 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
926
927 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
928 {
929#endif
930 /*
931 * Make sure we don't have any outstanding guest register writes as we may
932 * raise an #NM and all guest register must be up to date in CPUMCTX.
933 */
934 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
935 off = iemNativeRegFlushPendingWrites(pReNative, off);
936
937#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
938 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
939#else
940 RT_NOREF(idxInstr);
941#endif
942
943 /* Allocate a temporary CR0 register. */
944 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
945
946 /*
947 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
948 * return raisexcpt();
949 */
950 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
951 /* Test and jump. */
952 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeLabelType_RaiseNm);
953
954 /* Free the CR0 register. */
955 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
956
957#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
958 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
959 }
960 else
961 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
962#endif
963
964 return off;
965}
966
967
968#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
969 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
970
971/**
972 * Emits code to check if a \#MF exception should be raised.
973 *
974 * @returns New code buffer offset, UINT32_MAX on failure.
975 * @param pReNative The native recompile state.
976 * @param off The code buffer offset.
977 * @param idxInstr The current instruction.
978 */
979DECL_INLINE_THROW(uint32_t)
980iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
981{
982 /*
983 * Make sure we don't have any outstanding guest register writes as we may
984 * raise an #MF and all guest register must be up to date in CPUMCTX.
985 */
986 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
987 off = iemNativeRegFlushPendingWrites(pReNative, off);
988
989#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
990 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
991#else
992 RT_NOREF(idxInstr);
993#endif
994
995 /* Allocate a temporary FSW register. */
996 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
997 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
998
999 /*
1000 * if (FSW & X86_FSW_ES != 0)
1001 * return raisexcpt();
1002 */
1003 /* Test and jump. */
1004 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, idxLabelRaiseMf);
1005
1006 /* Free but don't flush the FSW register. */
1007 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
1008
1009 return off;
1010}
1011
1012
1013#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
1014 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1015
1016/**
1017 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
1018 *
1019 * @returns New code buffer offset, UINT32_MAX on failure.
1020 * @param pReNative The native recompile state.
1021 * @param off The code buffer offset.
1022 * @param idxInstr The current instruction.
1023 */
1024DECL_INLINE_THROW(uint32_t)
1025iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1026{
1027#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1028 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
1029
1030 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
1031 {
1032#endif
1033 /*
1034 * Make sure we don't have any outstanding guest register writes as we may
1035 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1036 */
1037 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1038 off = iemNativeRegFlushPendingWrites(pReNative, off);
1039
1040#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1041 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1042#else
1043 RT_NOREF(idxInstr);
1044#endif
1045
1046 /* Allocate a temporary CR0 and CR4 register. */
1047 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
1048 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1049 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1050 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1051
1052 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
1053#ifdef RT_ARCH_AMD64
1054 /*
1055 * We do a modified test here:
1056 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
1057 * else { goto RaiseSseRelated; }
1058 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
1059 * all targets except the 386, which doesn't support SSE, this should
1060 * be a safe assumption.
1061 */
1062 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
1063 //pCodeBuf[off++] = 0xcc;
1064 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
1065 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
1066 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
1067 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
1068 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
1069 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
1070
1071#elif defined(RT_ARCH_ARM64)
1072 /*
1073 * We do a modified test here:
1074 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
1075 * else { goto RaiseSseRelated; }
1076 */
1077 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
1078 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1079 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
1080 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
1081 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
1082 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1083 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1084 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
1085 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1086 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1087 idxLabelRaiseSseRelated);
1088
1089#else
1090# error "Port me!"
1091#endif
1092
1093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1094 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1095 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1096 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1097
1098#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1099 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1100 }
1101 else
1102 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1103#endif
1104
1105 return off;
1106}
1107
1108
1109#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1110 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1111
1112/**
1113 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1114 *
1115 * @returns New code buffer offset, UINT32_MAX on failure.
1116 * @param pReNative The native recompile state.
1117 * @param off The code buffer offset.
1118 * @param idxInstr The current instruction.
1119 */
1120DECL_INLINE_THROW(uint32_t)
1121iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1122{
1123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1124 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1125
1126 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1127 {
1128#endif
1129 /*
1130 * Make sure we don't have any outstanding guest register writes as we may
1131 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1132 */
1133 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1134 off = iemNativeRegFlushPendingWrites(pReNative, off);
1135
1136#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1137 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1138#else
1139 RT_NOREF(idxInstr);
1140#endif
1141
1142 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1143 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1144 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1145 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1146 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1147 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1148
1149 /*
1150 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1151 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1152 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1153 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1154 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1155 * { likely }
1156 * else { goto RaiseAvxRelated; }
1157 */
1158#ifdef RT_ARCH_AMD64
1159 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1160 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1161 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1162 ^ 0x1a) ) { likely }
1163 else { goto RaiseAvxRelated; } */
1164 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1165 //pCodeBuf[off++] = 0xcc;
1166 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1167 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1168 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1169 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1170 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1171 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1172 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1173 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1174 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1175 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1176 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1177
1178#elif defined(RT_ARCH_ARM64)
1179 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1180 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1181 else { goto RaiseAvxRelated; } */
1182 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1183 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1184 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1185 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1186 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1187 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1188 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1189 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1190 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1191 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1192 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1193 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1194 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1195 idxLabelRaiseAvxRelated);
1196
1197#else
1198# error "Port me!"
1199#endif
1200
1201 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1202 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1203 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1204 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1205#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1206 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1207 }
1208 else
1209 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1210#endif
1211
1212 return off;
1213}
1214
1215
1216#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1217#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1218 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
1219
1220/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1221DECL_INLINE_THROW(uint32_t)
1222iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1223{
1224 /*
1225 * Make sure we don't have any outstanding guest register writes as we may
1226 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
1227 */
1228 off = iemNativeRegFlushPendingWrites(pReNative, off);
1229
1230#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1231 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1232#else
1233 RT_NOREF(idxInstr);
1234#endif
1235
1236 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1237 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1238 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1239
1240 /* mov tmp, varmxcsr */
1241 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1242 /* tmp &= X86_MXCSR_XCPT_MASK */
1243 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1244 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1245 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1246 /* tmp = ~tmp */
1247 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1248 /* tmp &= mxcsr */
1249 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1250 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1251 idxLabelRaiseSseAvxFpRelated);
1252
1253 /* Free but don't flush the MXCSR register. */
1254 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1255 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1256
1257 return off;
1258}
1259#endif
1260
1261
1262#define IEM_MC_RAISE_DIVIDE_ERROR() \
1263 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1264
1265/**
1266 * Emits code to raise a \#DE.
1267 *
1268 * @returns New code buffer offset, UINT32_MAX on failure.
1269 * @param pReNative The native recompile state.
1270 * @param off The code buffer offset.
1271 * @param idxInstr The current instruction.
1272 */
1273DECL_INLINE_THROW(uint32_t)
1274iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1275{
1276 /*
1277 * Make sure we don't have any outstanding guest register writes as we may
1278 */
1279 off = iemNativeRegFlushPendingWrites(pReNative, off);
1280
1281#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1282 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1283#else
1284 RT_NOREF(idxInstr);
1285#endif
1286
1287 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1288
1289 /* raise \#DE exception unconditionally. */
1290 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1291
1292 return off;
1293}
1294
1295
1296#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
1297 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
1298
1299/**
1300 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
1301 *
1302 * @returns New code buffer offset, UINT32_MAX on failure.
1303 * @param pReNative The native recompile state.
1304 * @param off The code buffer offset.
1305 * @param idxInstr The current instruction.
1306 * @param idxVarEffAddr Index of the variable containing the effective address to check.
1307 * @param cbAlign The alignment in bytes to check against.
1308 */
1309DECL_INLINE_THROW(uint32_t)
1310iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
1311{
1312 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
1313 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
1314
1315 /*
1316 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
1317 */
1318 off = iemNativeRegFlushPendingWrites(pReNative, off);
1319
1320#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1321 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1322#else
1323 RT_NOREF(idxInstr);
1324#endif
1325
1326 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
1327 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
1328
1329 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
1330
1331 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
1332 return off;
1333}
1334
1335
1336/*********************************************************************************************************************************
1337* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1338*********************************************************************************************************************************/
1339
1340/**
1341 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1342 *
1343 * @returns Pointer to the condition stack entry on success, NULL on failure
1344 * (too many nestings)
1345 */
1346DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1347{
1348#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1349 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1350#endif
1351
1352 uint32_t const idxStack = pReNative->cCondDepth;
1353 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1354
1355 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1356 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1357
1358 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1359 pEntry->fInElse = false;
1360 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1361 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1362
1363 return pEntry;
1364}
1365
1366
1367/**
1368 * Start of the if-block, snapshotting the register and variable state.
1369 */
1370DECL_INLINE_THROW(void)
1371iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1372{
1373 Assert(offIfBlock != UINT32_MAX);
1374 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1375 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1376 Assert(!pEntry->fInElse);
1377
1378 /* Define the start of the IF block if request or for disassembly purposes. */
1379 if (idxLabelIf != UINT32_MAX)
1380 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1381#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1382 else
1383 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1384#else
1385 RT_NOREF(offIfBlock);
1386#endif
1387
1388#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1389 Assert(pReNative->Core.offPc == 0);
1390#endif
1391
1392 /* Copy the initial state so we can restore it in the 'else' block. */
1393 pEntry->InitialState = pReNative->Core;
1394}
1395
1396
1397#define IEM_MC_ELSE() } while (0); \
1398 off = iemNativeEmitElse(pReNative, off); \
1399 do {
1400
1401/** Emits code related to IEM_MC_ELSE. */
1402DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1403{
1404 /* Check sanity and get the conditional stack entry. */
1405 Assert(off != UINT32_MAX);
1406 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1407 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1408 Assert(!pEntry->fInElse);
1409
1410#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1411 /* Writeback any dirty shadow registers. */
1412 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1413 * in one of the branches and leave guest registers already dirty before the start of the if
1414 * block alone. */
1415 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1416#endif
1417
1418 /* Jump to the endif */
1419 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1420
1421 /* Define the else label and enter the else part of the condition. */
1422 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1423 pEntry->fInElse = true;
1424
1425#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1426 Assert(pReNative->Core.offPc == 0);
1427#endif
1428
1429 /* Snapshot the core state so we can do a merge at the endif and restore
1430 the snapshot we took at the start of the if-block. */
1431 pEntry->IfFinalState = pReNative->Core;
1432 pReNative->Core = pEntry->InitialState;
1433
1434 return off;
1435}
1436
1437
1438#define IEM_MC_ENDIF() } while (0); \
1439 off = iemNativeEmitEndIf(pReNative, off)
1440
1441/** Emits code related to IEM_MC_ENDIF. */
1442DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1443{
1444 /* Check sanity and get the conditional stack entry. */
1445 Assert(off != UINT32_MAX);
1446 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1447 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1448
1449#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1450 Assert(pReNative->Core.offPc == 0);
1451#endif
1452#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1453 /* Writeback any dirty shadow registers (else branch). */
1454 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1455 * in one of the branches and leave guest registers already dirty before the start of the if
1456 * block alone. */
1457 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1458#endif
1459
1460 /*
1461 * Now we have find common group with the core state at the end of the
1462 * if-final. Use the smallest common denominator and just drop anything
1463 * that isn't the same in both states.
1464 */
1465 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1466 * which is why we're doing this at the end of the else-block.
1467 * But we'd need more info about future for that to be worth the effort. */
1468 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1469#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1470 Assert( pOther->bmGstRegShadowDirty == 0
1471 && pReNative->Core.bmGstRegShadowDirty == 0);
1472#endif
1473
1474 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1475 {
1476 /* shadow guest stuff first. */
1477 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1478 if (fGstRegs)
1479 {
1480 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1481 do
1482 {
1483 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1484 fGstRegs &= ~RT_BIT_64(idxGstReg);
1485
1486 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1487 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1488 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1489 {
1490 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1491 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1492
1493#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1494 /* Writeback any dirty shadow registers we are about to unshadow. */
1495 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1496#endif
1497 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1498 }
1499 } while (fGstRegs);
1500 }
1501 else
1502 {
1503 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1504#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1505 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1506#endif
1507 }
1508
1509 /* Check variables next. For now we must require them to be identical
1510 or stuff we can recreate. */
1511 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1512 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1513 if (fVars)
1514 {
1515 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1516 do
1517 {
1518 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1519 fVars &= ~RT_BIT_32(idxVar);
1520
1521 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1522 {
1523 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1524 continue;
1525 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1526 {
1527 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1528 if (idxHstReg != UINT8_MAX)
1529 {
1530 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1531 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1532 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1533 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1534 }
1535 continue;
1536 }
1537 }
1538 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1539 continue;
1540
1541 /* Irreconcilable, so drop it. */
1542 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1543 if (idxHstReg != UINT8_MAX)
1544 {
1545 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1546 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1547 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1548 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1549 }
1550 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1551 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1552 } while (fVars);
1553 }
1554
1555 /* Finally, check that the host register allocations matches. */
1556 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1557 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1558 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1559 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1560 }
1561
1562 /*
1563 * Define the endif label and maybe the else one if we're still in the 'if' part.
1564 */
1565 if (!pEntry->fInElse)
1566 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1567 else
1568 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1569 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1570
1571 /* Pop the conditional stack.*/
1572 pReNative->cCondDepth -= 1;
1573
1574 return off;
1575}
1576
1577
1578#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1579 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1580 do {
1581
1582/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1583DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1584{
1585 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1586 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1587
1588 /* Get the eflags. */
1589 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1590 kIemNativeGstRegUse_ReadOnly);
1591
1592 /* Test and jump. */
1593 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1594
1595 /* Free but don't flush the EFlags register. */
1596 iemNativeRegFreeTmp(pReNative, idxEflReg);
1597
1598 /* Make a copy of the core state now as we start the if-block. */
1599 iemNativeCondStartIfBlock(pReNative, off);
1600
1601 return off;
1602}
1603
1604
1605#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1606 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1607 do {
1608
1609/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1610DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1611{
1612 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1613 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1614
1615 /* Get the eflags. */
1616 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1617 kIemNativeGstRegUse_ReadOnly);
1618
1619 /* Test and jump. */
1620 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1621
1622 /* Free but don't flush the EFlags register. */
1623 iemNativeRegFreeTmp(pReNative, idxEflReg);
1624
1625 /* Make a copy of the core state now as we start the if-block. */
1626 iemNativeCondStartIfBlock(pReNative, off);
1627
1628 return off;
1629}
1630
1631
1632#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1633 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1634 do {
1635
1636/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1637DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1638{
1639 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1640 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1641
1642 /* Get the eflags. */
1643 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1644 kIemNativeGstRegUse_ReadOnly);
1645
1646 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1647 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1648
1649 /* Test and jump. */
1650 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1651
1652 /* Free but don't flush the EFlags register. */
1653 iemNativeRegFreeTmp(pReNative, idxEflReg);
1654
1655 /* Make a copy of the core state now as we start the if-block. */
1656 iemNativeCondStartIfBlock(pReNative, off);
1657
1658 return off;
1659}
1660
1661
1662#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1663 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1664 do {
1665
1666/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1667DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1668{
1669 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1670 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1671
1672 /* Get the eflags. */
1673 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1674 kIemNativeGstRegUse_ReadOnly);
1675
1676 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1677 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1678
1679 /* Test and jump. */
1680 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1681
1682 /* Free but don't flush the EFlags register. */
1683 iemNativeRegFreeTmp(pReNative, idxEflReg);
1684
1685 /* Make a copy of the core state now as we start the if-block. */
1686 iemNativeCondStartIfBlock(pReNative, off);
1687
1688 return off;
1689}
1690
1691
1692#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1693 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1694 do {
1695
1696#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1697 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1698 do {
1699
1700/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1701DECL_INLINE_THROW(uint32_t)
1702iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1703 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1704{
1705 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1706 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1707
1708 /* Get the eflags. */
1709 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1710 kIemNativeGstRegUse_ReadOnly);
1711
1712 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1713 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1714
1715 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1716 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1717 Assert(iBitNo1 != iBitNo2);
1718
1719#ifdef RT_ARCH_AMD64
1720 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1721
1722 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1723 if (iBitNo1 > iBitNo2)
1724 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1725 else
1726 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1727 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1728
1729#elif defined(RT_ARCH_ARM64)
1730 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1731 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1732
1733 /* and tmpreg, eflreg, #1<<iBitNo1 */
1734 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1735
1736 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1737 if (iBitNo1 > iBitNo2)
1738 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1739 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1740 else
1741 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1742 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1743
1744 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1745
1746#else
1747# error "Port me"
1748#endif
1749
1750 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1751 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1752 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1753
1754 /* Free but don't flush the EFlags and tmp registers. */
1755 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1756 iemNativeRegFreeTmp(pReNative, idxEflReg);
1757
1758 /* Make a copy of the core state now as we start the if-block. */
1759 iemNativeCondStartIfBlock(pReNative, off);
1760
1761 return off;
1762}
1763
1764
1765#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1766 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1767 do {
1768
1769#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1770 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1771 do {
1772
1773/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1774 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1775DECL_INLINE_THROW(uint32_t)
1776iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1777 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1778{
1779 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1780 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1781
1782 /* We need an if-block label for the non-inverted variant. */
1783 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1784 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1785
1786 /* Get the eflags. */
1787 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1788 kIemNativeGstRegUse_ReadOnly);
1789
1790 /* Translate the flag masks to bit numbers. */
1791 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1792 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1793
1794 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1795 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1796 Assert(iBitNo1 != iBitNo);
1797
1798 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1799 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1800 Assert(iBitNo2 != iBitNo);
1801 Assert(iBitNo2 != iBitNo1);
1802
1803#ifdef RT_ARCH_AMD64
1804 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1805#elif defined(RT_ARCH_ARM64)
1806 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1807#endif
1808
1809 /* Check for the lone bit first. */
1810 if (!fInverted)
1811 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1812 else
1813 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1814
1815 /* Then extract and compare the other two bits. */
1816#ifdef RT_ARCH_AMD64
1817 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1818 if (iBitNo1 > iBitNo2)
1819 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1820 else
1821 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1822 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1823
1824#elif defined(RT_ARCH_ARM64)
1825 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1826
1827 /* and tmpreg, eflreg, #1<<iBitNo1 */
1828 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1829
1830 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1831 if (iBitNo1 > iBitNo2)
1832 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1833 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1834 else
1835 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1836 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1837
1838 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1839
1840#else
1841# error "Port me"
1842#endif
1843
1844 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1845 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1846 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1847
1848 /* Free but don't flush the EFlags and tmp registers. */
1849 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1850 iemNativeRegFreeTmp(pReNative, idxEflReg);
1851
1852 /* Make a copy of the core state now as we start the if-block. */
1853 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1854
1855 return off;
1856}
1857
1858
1859#define IEM_MC_IF_CX_IS_NZ() \
1860 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1861 do {
1862
1863/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1864DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1865{
1866 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1867
1868 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1869 kIemNativeGstRegUse_ReadOnly);
1870 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1871 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1872
1873 iemNativeCondStartIfBlock(pReNative, off);
1874 return off;
1875}
1876
1877
1878#define IEM_MC_IF_ECX_IS_NZ() \
1879 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1880 do {
1881
1882#define IEM_MC_IF_RCX_IS_NZ() \
1883 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1884 do {
1885
1886/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1887DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1888{
1889 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1890
1891 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1892 kIemNativeGstRegUse_ReadOnly);
1893 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1894 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1895
1896 iemNativeCondStartIfBlock(pReNative, off);
1897 return off;
1898}
1899
1900
1901#define IEM_MC_IF_CX_IS_NOT_ONE() \
1902 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1903 do {
1904
1905/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1906DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1907{
1908 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1909
1910 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1911 kIemNativeGstRegUse_ReadOnly);
1912#ifdef RT_ARCH_AMD64
1913 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1914#else
1915 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1916 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1917 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1918#endif
1919 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1920
1921 iemNativeCondStartIfBlock(pReNative, off);
1922 return off;
1923}
1924
1925
1926#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1927 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1928 do {
1929
1930#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1931 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1932 do {
1933
1934/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1935DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1936{
1937 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1938
1939 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1940 kIemNativeGstRegUse_ReadOnly);
1941 if (f64Bit)
1942 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1943 else
1944 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1945 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1946
1947 iemNativeCondStartIfBlock(pReNative, off);
1948 return off;
1949}
1950
1951
1952#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1953 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1954 do {
1955
1956#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1957 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1958 do {
1959
1960/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1961 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1962DECL_INLINE_THROW(uint32_t)
1963iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1964{
1965 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1966 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1967
1968 /* We have to load both RCX and EFLAGS before we can start branching,
1969 otherwise we'll end up in the else-block with an inconsistent
1970 register allocator state.
1971 Doing EFLAGS first as it's more likely to be loaded, right? */
1972 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1973 kIemNativeGstRegUse_ReadOnly);
1974 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1975 kIemNativeGstRegUse_ReadOnly);
1976
1977 /** @todo we could reduce this to a single branch instruction by spending a
1978 * temporary register and some setnz stuff. Not sure if loops are
1979 * worth it. */
1980 /* Check CX. */
1981#ifdef RT_ARCH_AMD64
1982 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1983#else
1984 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1985 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1986 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1987#endif
1988
1989 /* Check the EFlags bit. */
1990 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1991 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1992 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1993 !fCheckIfSet /*fJmpIfSet*/);
1994
1995 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1996 iemNativeRegFreeTmp(pReNative, idxEflReg);
1997
1998 iemNativeCondStartIfBlock(pReNative, off);
1999 return off;
2000}
2001
2002
2003#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2004 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
2005 do {
2006
2007#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2008 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
2009 do {
2010
2011#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2012 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
2013 do {
2014
2015#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2016 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
2017 do {
2018
2019/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
2020 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
2021 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
2022 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2023DECL_INLINE_THROW(uint32_t)
2024iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2025 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
2026{
2027 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2028 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2029
2030 /* We have to load both RCX and EFLAGS before we can start branching,
2031 otherwise we'll end up in the else-block with an inconsistent
2032 register allocator state.
2033 Doing EFLAGS first as it's more likely to be loaded, right? */
2034 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2035 kIemNativeGstRegUse_ReadOnly);
2036 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2037 kIemNativeGstRegUse_ReadOnly);
2038
2039 /** @todo we could reduce this to a single branch instruction by spending a
2040 * temporary register and some setnz stuff. Not sure if loops are
2041 * worth it. */
2042 /* Check RCX/ECX. */
2043 if (f64Bit)
2044 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2045 else
2046 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2047
2048 /* Check the EFlags bit. */
2049 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2050 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2051 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
2052 !fCheckIfSet /*fJmpIfSet*/);
2053
2054 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2055 iemNativeRegFreeTmp(pReNative, idxEflReg);
2056
2057 iemNativeCondStartIfBlock(pReNative, off);
2058 return off;
2059}
2060
2061
2062#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
2063 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
2064 do {
2065
2066/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
2067DECL_INLINE_THROW(uint32_t)
2068iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
2069{
2070 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2071
2072 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
2073 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
2074 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2075 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2076
2077 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
2078
2079 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
2080
2081 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
2082
2083 iemNativeCondStartIfBlock(pReNative, off);
2084 return off;
2085}
2086
2087
2088#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
2089 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
2090 do {
2091
2092/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
2093DECL_INLINE_THROW(uint32_t)
2094iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
2095{
2096 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2097 Assert(iGReg < 16);
2098
2099 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2100 kIemNativeGstRegUse_ReadOnly);
2101
2102 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2103
2104 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2105
2106 iemNativeCondStartIfBlock(pReNative, off);
2107 return off;
2108}
2109
2110
2111
2112/*********************************************************************************************************************************
2113* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2114*********************************************************************************************************************************/
2115
2116#define IEM_MC_NOREF(a_Name) \
2117 RT_NOREF_PV(a_Name)
2118
2119#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2120 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2121
2122#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2123 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2124
2125#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2126 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2127
2128#define IEM_MC_LOCAL(a_Type, a_Name) \
2129 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2130
2131#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2132 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2133
2134#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2135 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2136
2137
2138/**
2139 * Sets the host register for @a idxVarRc to @a idxReg.
2140 *
2141 * The register must not be allocated. Any guest register shadowing will be
2142 * implictly dropped by this call.
2143 *
2144 * The variable must not have any register associated with it (causes
2145 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2146 * implied.
2147 *
2148 * @returns idxReg
2149 * @param pReNative The recompiler state.
2150 * @param idxVar The variable.
2151 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2152 * @param off For recording in debug info.
2153 *
2154 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2155 */
2156DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2157{
2158 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2159 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2160 Assert(!pVar->fRegAcquired);
2161 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2162 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2163 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2164
2165 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2166 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2167
2168 iemNativeVarSetKindToStack(pReNative, idxVar);
2169 pVar->idxReg = idxReg;
2170
2171 return idxReg;
2172}
2173
2174
2175/**
2176 * A convenient helper function.
2177 */
2178DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2179 uint8_t idxReg, uint32_t *poff)
2180{
2181 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2182 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2183 return idxReg;
2184}
2185
2186
2187/**
2188 * This is called by IEM_MC_END() to clean up all variables.
2189 */
2190DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2191{
2192 uint32_t const bmVars = pReNative->Core.bmVars;
2193 if (bmVars != 0)
2194 iemNativeVarFreeAllSlow(pReNative, bmVars);
2195 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2196 Assert(pReNative->Core.bmStack == 0);
2197}
2198
2199
2200#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2201
2202/**
2203 * This is called by IEM_MC_FREE_LOCAL.
2204 */
2205DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2206{
2207 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2208 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2209 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2210}
2211
2212
2213#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2214
2215/**
2216 * This is called by IEM_MC_FREE_ARG.
2217 */
2218DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2219{
2220 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2221 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2222 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2223}
2224
2225
2226#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2227
2228/**
2229 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2230 */
2231DECL_INLINE_THROW(uint32_t)
2232iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2233{
2234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2235 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2236 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2237 Assert( pVarDst->cbVar == sizeof(uint16_t)
2238 || pVarDst->cbVar == sizeof(uint32_t));
2239
2240 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2241 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2242 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2243 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2244 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2245
2246 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2247
2248 /*
2249 * Special case for immediates.
2250 */
2251 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2252 {
2253 switch (pVarDst->cbVar)
2254 {
2255 case sizeof(uint16_t):
2256 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2257 break;
2258 case sizeof(uint32_t):
2259 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2260 break;
2261 default: AssertFailed(); break;
2262 }
2263 }
2264 else
2265 {
2266 /*
2267 * The generic solution for now.
2268 */
2269 /** @todo optimize this by having the python script make sure the source
2270 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2271 * statement. Then we could just transfer the register assignments. */
2272 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2273 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2274 switch (pVarDst->cbVar)
2275 {
2276 case sizeof(uint16_t):
2277 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2278 break;
2279 case sizeof(uint32_t):
2280 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2281 break;
2282 default: AssertFailed(); break;
2283 }
2284 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2285 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2286 }
2287 return off;
2288}
2289
2290
2291
2292/*********************************************************************************************************************************
2293* Emitters for IEM_MC_CALL_CIMPL_XXX *
2294*********************************************************************************************************************************/
2295
2296/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2297DECL_INLINE_THROW(uint32_t)
2298iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2299 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2300
2301{
2302 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2303
2304#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2305 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2306 when a calls clobber any of the relevatn control registers. */
2307# if 1
2308 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2309 {
2310 /* Likely as long as call+ret are done via cimpl. */
2311 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2312 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2313 }
2314 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2315 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2316 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2317 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2318 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2319 else
2320 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2321 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2322 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2323
2324# else
2325 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2326 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2327 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2328 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2329 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2330 || pfnCImpl == (uintptr_t)iemCImpl_callf
2331 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2332 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2333 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2334 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2335 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2336# endif
2337#endif
2338
2339 /*
2340 * Do all the call setup and cleanup.
2341 */
2342 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2343
2344 /*
2345 * Load the two or three hidden arguments.
2346 */
2347#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2348 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2349 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2350 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2351#else
2352 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2353 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2354#endif
2355
2356 /*
2357 * Make the call and check the return code.
2358 *
2359 * Shadow PC copies are always flushed here, other stuff depends on flags.
2360 * Segment and general purpose registers are explictily flushed via the
2361 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2362 * macros.
2363 */
2364 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2365#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2366 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2367#endif
2368 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2369 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2370 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2371 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2372
2373 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2374}
2375
2376
2377#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2378 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2379
2380/** Emits code for IEM_MC_CALL_CIMPL_1. */
2381DECL_INLINE_THROW(uint32_t)
2382iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2383 uintptr_t pfnCImpl, uint8_t idxArg0)
2384{
2385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2386 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2387}
2388
2389
2390#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2391 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2392
2393/** Emits code for IEM_MC_CALL_CIMPL_2. */
2394DECL_INLINE_THROW(uint32_t)
2395iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2396 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2397{
2398 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2399 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2400 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2401}
2402
2403
2404#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2405 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2406 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2407
2408/** Emits code for IEM_MC_CALL_CIMPL_3. */
2409DECL_INLINE_THROW(uint32_t)
2410iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2411 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2412{
2413 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2414 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2415 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2416 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2417}
2418
2419
2420#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2421 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2422 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2423
2424/** Emits code for IEM_MC_CALL_CIMPL_4. */
2425DECL_INLINE_THROW(uint32_t)
2426iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2427 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2428{
2429 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2430 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2431 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2432 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2433 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2434}
2435
2436
2437#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2438 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2439 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2440
2441/** Emits code for IEM_MC_CALL_CIMPL_4. */
2442DECL_INLINE_THROW(uint32_t)
2443iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2444 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2445{
2446 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2447 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2448 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2449 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2450 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2451 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2452}
2453
2454
2455/** Recompiler debugging: Flush guest register shadow copies. */
2456#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2457
2458
2459
2460/*********************************************************************************************************************************
2461* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2462*********************************************************************************************************************************/
2463
2464/**
2465 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2466 */
2467DECL_INLINE_THROW(uint32_t)
2468iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2469 uintptr_t pfnAImpl, uint8_t cArgs)
2470{
2471 if (idxVarRc != UINT8_MAX)
2472 {
2473 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2474 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2475 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2476 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2477 }
2478
2479 /*
2480 * Do all the call setup and cleanup.
2481 *
2482 * It is only required to flush pending guest register writes in call volatile registers as
2483 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2484 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2485 * no matter the fFlushPendingWrites parameter.
2486 */
2487 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2488
2489 /*
2490 * Make the call and update the return code variable if we've got one.
2491 */
2492 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2493 if (idxVarRc != UINT8_MAX)
2494 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2495
2496 return off;
2497}
2498
2499
2500
2501#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2502 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2503
2504#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2505 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2506
2507/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2508DECL_INLINE_THROW(uint32_t)
2509iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2510{
2511 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2512}
2513
2514
2515#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2516 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2517
2518#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2519 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2520
2521/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2522DECL_INLINE_THROW(uint32_t)
2523iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2524{
2525 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2526 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2527}
2528
2529
2530#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2531 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2532
2533#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2534 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2535
2536/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2537DECL_INLINE_THROW(uint32_t)
2538iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2539 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2540{
2541 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2542 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2543 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2544}
2545
2546
2547#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2548 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2549
2550#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
2551 IEM_MC_LOCAL(a_rcType, a_rc); \
2552 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2553
2554/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2555DECL_INLINE_THROW(uint32_t)
2556iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2557 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2558{
2559 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2560 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2561 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2562 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2563}
2564
2565
2566#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2567 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2568
2569#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
2570 IEM_MC_LOCAL(a_rcType, a_rc); \
2571 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2572
2573/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2574DECL_INLINE_THROW(uint32_t)
2575iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2576 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2577{
2578 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2579 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2580 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2581 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2582 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2583}
2584
2585
2586
2587/*********************************************************************************************************************************
2588* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2589*********************************************************************************************************************************/
2590
2591#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2592 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2593
2594#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2595 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2596
2597#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2598 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2599
2600#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2601 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2602
2603
2604/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2605 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2606DECL_INLINE_THROW(uint32_t)
2607iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2608{
2609 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2610 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2611 Assert(iGRegEx < 20);
2612
2613 /* Same discussion as in iemNativeEmitFetchGregU16 */
2614 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2615 kIemNativeGstRegUse_ReadOnly);
2616
2617 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2618 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2619
2620 /* The value is zero-extended to the full 64-bit host register width. */
2621 if (iGRegEx < 16)
2622 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2623 else
2624 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2625
2626 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2627 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2628 return off;
2629}
2630
2631
2632#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2633 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2634
2635#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2636 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2637
2638#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2639 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2640
2641/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2642DECL_INLINE_THROW(uint32_t)
2643iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2644{
2645 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2646 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2647 Assert(iGRegEx < 20);
2648
2649 /* Same discussion as in iemNativeEmitFetchGregU16 */
2650 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2651 kIemNativeGstRegUse_ReadOnly);
2652
2653 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2654 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2655
2656 if (iGRegEx < 16)
2657 {
2658 switch (cbSignExtended)
2659 {
2660 case sizeof(uint16_t):
2661 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2662 break;
2663 case sizeof(uint32_t):
2664 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2665 break;
2666 case sizeof(uint64_t):
2667 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2668 break;
2669 default: AssertFailed(); break;
2670 }
2671 }
2672 else
2673 {
2674 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2675 switch (cbSignExtended)
2676 {
2677 case sizeof(uint16_t):
2678 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2679 break;
2680 case sizeof(uint32_t):
2681 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2682 break;
2683 case sizeof(uint64_t):
2684 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2685 break;
2686 default: AssertFailed(); break;
2687 }
2688 }
2689
2690 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2691 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2692 return off;
2693}
2694
2695
2696
2697#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2698 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2699
2700#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2701 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2702
2703#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2704 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2705
2706/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2707DECL_INLINE_THROW(uint32_t)
2708iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2709{
2710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2711 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2712 Assert(iGReg < 16);
2713
2714 /*
2715 * We can either just load the low 16-bit of the GPR into a host register
2716 * for the variable, or we can do so via a shadow copy host register. The
2717 * latter will avoid having to reload it if it's being stored later, but
2718 * will waste a host register if it isn't touched again. Since we don't
2719 * know what going to happen, we choose the latter for now.
2720 */
2721 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2722 kIemNativeGstRegUse_ReadOnly);
2723
2724 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2725 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2726 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2727 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2728
2729 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2730 return off;
2731}
2732
2733
2734#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2735 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2736
2737#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2738 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2739
2740/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2741DECL_INLINE_THROW(uint32_t)
2742iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2743{
2744 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2745 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2746 Assert(iGReg < 16);
2747
2748 /*
2749 * We can either just load the low 16-bit of the GPR into a host register
2750 * for the variable, or we can do so via a shadow copy host register. The
2751 * latter will avoid having to reload it if it's being stored later, but
2752 * will waste a host register if it isn't touched again. Since we don't
2753 * know what going to happen, we choose the latter for now.
2754 */
2755 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2756 kIemNativeGstRegUse_ReadOnly);
2757
2758 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2759 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2760 if (cbSignExtended == sizeof(uint32_t))
2761 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2762 else
2763 {
2764 Assert(cbSignExtended == sizeof(uint64_t));
2765 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2766 }
2767 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2768
2769 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2770 return off;
2771}
2772
2773
2774#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2775 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2776
2777#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2778 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2779
2780/** Emits code for IEM_MC_FETCH_GREG_U32. */
2781DECL_INLINE_THROW(uint32_t)
2782iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2783{
2784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2786 Assert(iGReg < 16);
2787
2788 /*
2789 * We can either just load the low 16-bit of the GPR into a host register
2790 * for the variable, or we can do so via a shadow copy host register. The
2791 * latter will avoid having to reload it if it's being stored later, but
2792 * will waste a host register if it isn't touched again. Since we don't
2793 * know what going to happen, we choose the latter for now.
2794 */
2795 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2796 kIemNativeGstRegUse_ReadOnly);
2797
2798 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2800 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2801 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2802
2803 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2804 return off;
2805}
2806
2807
2808#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2809 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2810
2811/** Emits code for IEM_MC_FETCH_GREG_U32. */
2812DECL_INLINE_THROW(uint32_t)
2813iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2814{
2815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2816 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2817 Assert(iGReg < 16);
2818
2819 /*
2820 * We can either just load the low 32-bit of the GPR into a host register
2821 * for the variable, or we can do so via a shadow copy host register. The
2822 * latter will avoid having to reload it if it's being stored later, but
2823 * will waste a host register if it isn't touched again. Since we don't
2824 * know what going to happen, we choose the latter for now.
2825 */
2826 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2827 kIemNativeGstRegUse_ReadOnly);
2828
2829 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2830 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2831 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2832 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2833
2834 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2835 return off;
2836}
2837
2838
2839#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2840 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2841
2842#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2843 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2844
2845/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2846 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2847DECL_INLINE_THROW(uint32_t)
2848iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2849{
2850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2852 Assert(iGReg < 16);
2853
2854 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2855 kIemNativeGstRegUse_ReadOnly);
2856
2857 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2858 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2859 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2860 /** @todo name the register a shadow one already? */
2861 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2862
2863 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2864 return off;
2865}
2866
2867
2868#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2869#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
2870 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
2871
2872/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
2873DECL_INLINE_THROW(uint32_t)
2874iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
2875{
2876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2877 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
2878 Assert(iGRegLo < 16 && iGRegHi < 16);
2879
2880 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
2881 kIemNativeGstRegUse_ReadOnly);
2882 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
2883 kIemNativeGstRegUse_ReadOnly);
2884
2885 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2886 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
2887 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
2888 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
2889
2890 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
2891 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
2892 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
2893 return off;
2894}
2895#endif
2896
2897
2898/*********************************************************************************************************************************
2899* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2900*********************************************************************************************************************************/
2901
2902#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2903 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2904
2905/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2906DECL_INLINE_THROW(uint32_t)
2907iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2908{
2909 Assert(iGRegEx < 20);
2910 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2911 kIemNativeGstRegUse_ForUpdate);
2912#ifdef RT_ARCH_AMD64
2913 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2914
2915 /* To the lowest byte of the register: mov r8, imm8 */
2916 if (iGRegEx < 16)
2917 {
2918 if (idxGstTmpReg >= 8)
2919 pbCodeBuf[off++] = X86_OP_REX_B;
2920 else if (idxGstTmpReg >= 4)
2921 pbCodeBuf[off++] = X86_OP_REX;
2922 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2923 pbCodeBuf[off++] = u8Value;
2924 }
2925 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2926 else if (idxGstTmpReg < 4)
2927 {
2928 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2929 pbCodeBuf[off++] = u8Value;
2930 }
2931 else
2932 {
2933 /* ror reg64, 8 */
2934 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2935 pbCodeBuf[off++] = 0xc1;
2936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2937 pbCodeBuf[off++] = 8;
2938
2939 /* mov reg8, imm8 */
2940 if (idxGstTmpReg >= 8)
2941 pbCodeBuf[off++] = X86_OP_REX_B;
2942 else if (idxGstTmpReg >= 4)
2943 pbCodeBuf[off++] = X86_OP_REX;
2944 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2945 pbCodeBuf[off++] = u8Value;
2946
2947 /* rol reg64, 8 */
2948 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2949 pbCodeBuf[off++] = 0xc1;
2950 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2951 pbCodeBuf[off++] = 8;
2952 }
2953
2954#elif defined(RT_ARCH_ARM64)
2955 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2956 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2957 if (iGRegEx < 16)
2958 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2959 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2960 else
2961 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2962 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2963 iemNativeRegFreeTmp(pReNative, idxImmReg);
2964
2965#else
2966# error "Port me!"
2967#endif
2968
2969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2970
2971#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2972 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2973#endif
2974
2975 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2976 return off;
2977}
2978
2979
2980#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2981 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2982
2983/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2984DECL_INLINE_THROW(uint32_t)
2985iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2986{
2987 Assert(iGRegEx < 20);
2988 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2989
2990 /*
2991 * If it's a constant value (unlikely) we treat this as a
2992 * IEM_MC_STORE_GREG_U8_CONST statement.
2993 */
2994 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2995 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2996 { /* likely */ }
2997 else
2998 {
2999 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3000 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3001 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
3002 }
3003
3004 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3005 kIemNativeGstRegUse_ForUpdate);
3006 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3007
3008#ifdef RT_ARCH_AMD64
3009 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
3010 if (iGRegEx < 16)
3011 {
3012 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3013 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3014 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3015 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3016 pbCodeBuf[off++] = X86_OP_REX;
3017 pbCodeBuf[off++] = 0x8a;
3018 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3019 }
3020 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
3021 else if (idxGstTmpReg < 4 && idxVarReg < 4)
3022 {
3023 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
3024 pbCodeBuf[off++] = 0x8a;
3025 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
3026 }
3027 else
3028 {
3029 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
3030
3031 /* ror reg64, 8 */
3032 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3033 pbCodeBuf[off++] = 0xc1;
3034 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3035 pbCodeBuf[off++] = 8;
3036
3037 /* mov reg8, reg8(r/m) */
3038 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
3039 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
3040 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
3041 pbCodeBuf[off++] = X86_OP_REX;
3042 pbCodeBuf[off++] = 0x8a;
3043 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
3044
3045 /* rol reg64, 8 */
3046 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3047 pbCodeBuf[off++] = 0xc1;
3048 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3049 pbCodeBuf[off++] = 8;
3050 }
3051
3052#elif defined(RT_ARCH_ARM64)
3053 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
3054 or
3055 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
3056 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3057 if (iGRegEx < 16)
3058 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
3059 else
3060 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
3061
3062#else
3063# error "Port me!"
3064#endif
3065 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3066
3067 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3068
3069#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3070 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3071#endif
3072 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3073 return off;
3074}
3075
3076
3077
3078#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
3079 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
3080
3081/** Emits code for IEM_MC_STORE_GREG_U16. */
3082DECL_INLINE_THROW(uint32_t)
3083iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
3084{
3085 Assert(iGReg < 16);
3086 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3087 kIemNativeGstRegUse_ForUpdate);
3088#ifdef RT_ARCH_AMD64
3089 /* mov reg16, imm16 */
3090 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3091 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3092 if (idxGstTmpReg >= 8)
3093 pbCodeBuf[off++] = X86_OP_REX_B;
3094 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3095 pbCodeBuf[off++] = RT_BYTE1(uValue);
3096 pbCodeBuf[off++] = RT_BYTE2(uValue);
3097
3098#elif defined(RT_ARCH_ARM64)
3099 /* movk xdst, #uValue, lsl #0 */
3100 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3101 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3102
3103#else
3104# error "Port me!"
3105#endif
3106
3107 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3108
3109#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3110 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3111#endif
3112 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3113 return off;
3114}
3115
3116
3117#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3118 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3119
3120/** Emits code for IEM_MC_STORE_GREG_U16. */
3121DECL_INLINE_THROW(uint32_t)
3122iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3123{
3124 Assert(iGReg < 16);
3125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3126
3127 /*
3128 * If it's a constant value (unlikely) we treat this as a
3129 * IEM_MC_STORE_GREG_U16_CONST statement.
3130 */
3131 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3132 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3133 { /* likely */ }
3134 else
3135 {
3136 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3137 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3138 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3139 }
3140
3141 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3142 kIemNativeGstRegUse_ForUpdate);
3143
3144#ifdef RT_ARCH_AMD64
3145 /* mov reg16, reg16 or [mem16] */
3146 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3147 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3148 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3149 {
3150 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3151 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3152 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3153 pbCodeBuf[off++] = 0x8b;
3154 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3155 }
3156 else
3157 {
3158 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3159 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3160 if (idxGstTmpReg >= 8)
3161 pbCodeBuf[off++] = X86_OP_REX_R;
3162 pbCodeBuf[off++] = 0x8b;
3163 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3164 }
3165
3166#elif defined(RT_ARCH_ARM64)
3167 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3168 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3169 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3170 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3171 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3172
3173#else
3174# error "Port me!"
3175#endif
3176
3177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3178
3179#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3180 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3181#endif
3182 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3183 return off;
3184}
3185
3186
3187#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3188 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3189
3190/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3191DECL_INLINE_THROW(uint32_t)
3192iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3193{
3194 Assert(iGReg < 16);
3195 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3196 kIemNativeGstRegUse_ForFullWrite);
3197 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3198#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3199 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3200#endif
3201 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3202 return off;
3203}
3204
3205
3206#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3207 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3208
3209/** Emits code for IEM_MC_STORE_GREG_U32. */
3210DECL_INLINE_THROW(uint32_t)
3211iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3212{
3213 Assert(iGReg < 16);
3214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3215
3216 /*
3217 * If it's a constant value (unlikely) we treat this as a
3218 * IEM_MC_STORE_GREG_U32_CONST statement.
3219 */
3220 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3221 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3222 { /* likely */ }
3223 else
3224 {
3225 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3226 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3227 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3228 }
3229
3230 /*
3231 * For the rest we allocate a guest register for the variable and writes
3232 * it to the CPUMCTX structure.
3233 */
3234 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3235#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3236 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3237#else
3238 RT_NOREF(idxVarReg);
3239#endif
3240#ifdef VBOX_STRICT
3241 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3242#endif
3243 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3244 return off;
3245}
3246
3247
3248#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3249 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3250
3251/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3252DECL_INLINE_THROW(uint32_t)
3253iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3254{
3255 Assert(iGReg < 16);
3256 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3257 kIemNativeGstRegUse_ForFullWrite);
3258 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3259#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3260 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3261#endif
3262 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3263 return off;
3264}
3265
3266
3267#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3268 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3269
3270#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3271 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3272
3273/** Emits code for IEM_MC_STORE_GREG_U64. */
3274DECL_INLINE_THROW(uint32_t)
3275iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3276{
3277 Assert(iGReg < 16);
3278 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3279
3280 /*
3281 * If it's a constant value (unlikely) we treat this as a
3282 * IEM_MC_STORE_GREG_U64_CONST statement.
3283 */
3284 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3285 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3286 { /* likely */ }
3287 else
3288 {
3289 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3290 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3291 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3292 }
3293
3294 /*
3295 * For the rest we allocate a guest register for the variable and writes
3296 * it to the CPUMCTX structure.
3297 */
3298 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3299#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3300 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3301#else
3302 RT_NOREF(idxVarReg);
3303#endif
3304 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3305 return off;
3306}
3307
3308
3309#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3310 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3311
3312/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3313DECL_INLINE_THROW(uint32_t)
3314iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3315{
3316 Assert(iGReg < 16);
3317 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3318 kIemNativeGstRegUse_ForUpdate);
3319 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3320#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3321 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3322#endif
3323 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3324 return off;
3325}
3326
3327
3328#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3329#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
3330 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
3331
3332/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3333DECL_INLINE_THROW(uint32_t)
3334iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
3335{
3336 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3337 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3338 Assert(iGRegLo < 16 && iGRegHi < 16);
3339
3340 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3341 kIemNativeGstRegUse_ForFullWrite);
3342 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3343 kIemNativeGstRegUse_ForFullWrite);
3344
3345 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3346 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
3347 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
3348 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
3349
3350 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3351 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3352 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3353 return off;
3354}
3355#endif
3356
3357
3358/*********************************************************************************************************************************
3359* General purpose register manipulation (add, sub). *
3360*********************************************************************************************************************************/
3361
3362#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3363 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3364
3365/** Emits code for IEM_MC_ADD_GREG_U16. */
3366DECL_INLINE_THROW(uint32_t)
3367iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3368{
3369 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3370 kIemNativeGstRegUse_ForUpdate);
3371
3372#ifdef RT_ARCH_AMD64
3373 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3374 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3375 if (idxGstTmpReg >= 8)
3376 pbCodeBuf[off++] = X86_OP_REX_B;
3377 if (uAddend == 1)
3378 {
3379 pbCodeBuf[off++] = 0xff; /* inc */
3380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3381 }
3382 else
3383 {
3384 pbCodeBuf[off++] = 0x81;
3385 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3386 pbCodeBuf[off++] = uAddend;
3387 pbCodeBuf[off++] = 0;
3388 }
3389
3390#else
3391 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3392 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3393
3394 /* sub tmp, gstgrp, uAddend */
3395 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3396
3397 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3398 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3399
3400 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3401#endif
3402
3403 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3404
3405#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3406 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3407#endif
3408
3409 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3410 return off;
3411}
3412
3413
3414#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3415 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3416
3417#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3418 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3419
3420/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3421DECL_INLINE_THROW(uint32_t)
3422iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3423{
3424 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3425 kIemNativeGstRegUse_ForUpdate);
3426
3427#ifdef RT_ARCH_AMD64
3428 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3429 if (f64Bit)
3430 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3431 else if (idxGstTmpReg >= 8)
3432 pbCodeBuf[off++] = X86_OP_REX_B;
3433 if (uAddend == 1)
3434 {
3435 pbCodeBuf[off++] = 0xff; /* inc */
3436 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3437 }
3438 else if (uAddend < 128)
3439 {
3440 pbCodeBuf[off++] = 0x83; /* add */
3441 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3442 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3443 }
3444 else
3445 {
3446 pbCodeBuf[off++] = 0x81; /* add */
3447 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3448 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3449 pbCodeBuf[off++] = 0;
3450 pbCodeBuf[off++] = 0;
3451 pbCodeBuf[off++] = 0;
3452 }
3453
3454#else
3455 /* sub tmp, gstgrp, uAddend */
3456 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3457 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3458
3459#endif
3460
3461 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3462
3463#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3464 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3465#endif
3466
3467 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3468 return off;
3469}
3470
3471
3472
3473#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3474 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3475
3476/** Emits code for IEM_MC_SUB_GREG_U16. */
3477DECL_INLINE_THROW(uint32_t)
3478iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3479{
3480 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3481 kIemNativeGstRegUse_ForUpdate);
3482
3483#ifdef RT_ARCH_AMD64
3484 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3485 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3486 if (idxGstTmpReg >= 8)
3487 pbCodeBuf[off++] = X86_OP_REX_B;
3488 if (uSubtrahend == 1)
3489 {
3490 pbCodeBuf[off++] = 0xff; /* dec */
3491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3492 }
3493 else
3494 {
3495 pbCodeBuf[off++] = 0x81;
3496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3497 pbCodeBuf[off++] = uSubtrahend;
3498 pbCodeBuf[off++] = 0;
3499 }
3500
3501#else
3502 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3503 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3504
3505 /* sub tmp, gstgrp, uSubtrahend */
3506 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3507
3508 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3509 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3510
3511 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3512#endif
3513
3514 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3515
3516#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3518#endif
3519
3520 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3521 return off;
3522}
3523
3524
3525#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3526 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3527
3528#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3529 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3530
3531/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3532DECL_INLINE_THROW(uint32_t)
3533iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3534{
3535 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3536 kIemNativeGstRegUse_ForUpdate);
3537
3538#ifdef RT_ARCH_AMD64
3539 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3540 if (f64Bit)
3541 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3542 else if (idxGstTmpReg >= 8)
3543 pbCodeBuf[off++] = X86_OP_REX_B;
3544 if (uSubtrahend == 1)
3545 {
3546 pbCodeBuf[off++] = 0xff; /* dec */
3547 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3548 }
3549 else if (uSubtrahend < 128)
3550 {
3551 pbCodeBuf[off++] = 0x83; /* sub */
3552 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3553 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3554 }
3555 else
3556 {
3557 pbCodeBuf[off++] = 0x81; /* sub */
3558 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3559 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3560 pbCodeBuf[off++] = 0;
3561 pbCodeBuf[off++] = 0;
3562 pbCodeBuf[off++] = 0;
3563 }
3564
3565#else
3566 /* sub tmp, gstgrp, uSubtrahend */
3567 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3568 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3569
3570#endif
3571
3572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3573
3574#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3575 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3576#endif
3577
3578 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3579 return off;
3580}
3581
3582
3583#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3584 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3585
3586#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3587 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3588
3589#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3590 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3591
3592#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3593 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3594
3595/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3596DECL_INLINE_THROW(uint32_t)
3597iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3598{
3599#ifdef VBOX_STRICT
3600 switch (cbMask)
3601 {
3602 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3603 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3604 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3605 case sizeof(uint64_t): break;
3606 default: AssertFailedBreak();
3607 }
3608#endif
3609
3610 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3611 kIemNativeGstRegUse_ForUpdate);
3612
3613 switch (cbMask)
3614 {
3615 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3616 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3617 break;
3618 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3619 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3620 break;
3621 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3622 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3623 break;
3624 case sizeof(uint64_t):
3625 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3626 break;
3627 default: AssertFailedBreak();
3628 }
3629
3630 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3631
3632#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3633 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3634#endif
3635
3636 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3637 return off;
3638}
3639
3640
3641#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3642 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3643
3644#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3645 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3646
3647#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3648 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3649
3650#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3651 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3652
3653/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3654DECL_INLINE_THROW(uint32_t)
3655iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3656{
3657#ifdef VBOX_STRICT
3658 switch (cbMask)
3659 {
3660 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3661 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3662 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3663 case sizeof(uint64_t): break;
3664 default: AssertFailedBreak();
3665 }
3666#endif
3667
3668 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3669 kIemNativeGstRegUse_ForUpdate);
3670
3671 switch (cbMask)
3672 {
3673 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3674 case sizeof(uint16_t):
3675 case sizeof(uint64_t):
3676 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3677 break;
3678 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3679 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3680 break;
3681 default: AssertFailedBreak();
3682 }
3683
3684 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3685
3686#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3687 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3688#endif
3689
3690 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3691 return off;
3692}
3693
3694
3695/*********************************************************************************************************************************
3696* Local/Argument variable manipulation (add, sub, and, or). *
3697*********************************************************************************************************************************/
3698
3699#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3700 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3701
3702#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3703 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3704
3705#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3706 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3707
3708#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3709 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3710
3711
3712#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3713 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3714
3715#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3716 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3717
3718#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3719 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3720
3721/** Emits code for AND'ing a local and a constant value. */
3722DECL_INLINE_THROW(uint32_t)
3723iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3724{
3725#ifdef VBOX_STRICT
3726 switch (cbMask)
3727 {
3728 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3729 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3730 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3731 case sizeof(uint64_t): break;
3732 default: AssertFailedBreak();
3733 }
3734#endif
3735
3736 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3737 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3738
3739 if (cbMask <= sizeof(uint32_t))
3740 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3741 else
3742 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3743
3744 iemNativeVarRegisterRelease(pReNative, idxVar);
3745 return off;
3746}
3747
3748
3749#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3750 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3751
3752#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3753 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3754
3755#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3756 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3757
3758#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3759 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3760
3761/** Emits code for OR'ing a local and a constant value. */
3762DECL_INLINE_THROW(uint32_t)
3763iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3764{
3765#ifdef VBOX_STRICT
3766 switch (cbMask)
3767 {
3768 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3769 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3770 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3771 case sizeof(uint64_t): break;
3772 default: AssertFailedBreak();
3773 }
3774#endif
3775
3776 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3777 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3778
3779 if (cbMask <= sizeof(uint32_t))
3780 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3781 else
3782 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3783
3784 iemNativeVarRegisterRelease(pReNative, idxVar);
3785 return off;
3786}
3787
3788
3789#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3790 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3791
3792#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3793 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3794
3795#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3796 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3797
3798/** Emits code for reversing the byte order in a local value. */
3799DECL_INLINE_THROW(uint32_t)
3800iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3801{
3802 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3803 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3804
3805 switch (cbLocal)
3806 {
3807 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3808 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3809 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3810 default: AssertFailedBreak();
3811 }
3812
3813 iemNativeVarRegisterRelease(pReNative, idxVar);
3814 return off;
3815}
3816
3817
3818#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3819 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3820
3821#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3822 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3823
3824#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3825 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3826
3827/** Emits code for shifting left a local value. */
3828DECL_INLINE_THROW(uint32_t)
3829iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3830{
3831#ifdef VBOX_STRICT
3832 switch (cbLocal)
3833 {
3834 case sizeof(uint8_t): Assert(cShift < 8); break;
3835 case sizeof(uint16_t): Assert(cShift < 16); break;
3836 case sizeof(uint32_t): Assert(cShift < 32); break;
3837 case sizeof(uint64_t): Assert(cShift < 64); break;
3838 default: AssertFailedBreak();
3839 }
3840#endif
3841
3842 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3843 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3844
3845 if (cbLocal <= sizeof(uint32_t))
3846 {
3847 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3848 if (cbLocal < sizeof(uint32_t))
3849 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3850 cbLocal == sizeof(uint16_t)
3851 ? UINT32_C(0xffff)
3852 : UINT32_C(0xff));
3853 }
3854 else
3855 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3856
3857 iemNativeVarRegisterRelease(pReNative, idxVar);
3858 return off;
3859}
3860
3861
3862#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3863 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3864
3865#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3866 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3867
3868#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3869 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3870
3871/** Emits code for shifting left a local value. */
3872DECL_INLINE_THROW(uint32_t)
3873iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3874{
3875#ifdef VBOX_STRICT
3876 switch (cbLocal)
3877 {
3878 case sizeof(int8_t): Assert(cShift < 8); break;
3879 case sizeof(int16_t): Assert(cShift < 16); break;
3880 case sizeof(int32_t): Assert(cShift < 32); break;
3881 case sizeof(int64_t): Assert(cShift < 64); break;
3882 default: AssertFailedBreak();
3883 }
3884#endif
3885
3886 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3887 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3888
3889 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3890 if (cbLocal == sizeof(uint8_t))
3891 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3892 else if (cbLocal == sizeof(uint16_t))
3893 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3894
3895 if (cbLocal <= sizeof(uint32_t))
3896 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3897 else
3898 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3899
3900 iemNativeVarRegisterRelease(pReNative, idxVar);
3901 return off;
3902}
3903
3904
3905#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3906 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3907
3908#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3909 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3910
3911#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3912 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3913
3914/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3915DECL_INLINE_THROW(uint32_t)
3916iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3917{
3918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3919 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3920 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3921 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3922
3923 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3924 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3925
3926 /* Need to sign extend the value. */
3927 if (cbLocal <= sizeof(uint32_t))
3928 {
3929/** @todo ARM64: In case of boredone, the extended add instruction can do the
3930 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3931 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3932
3933 switch (cbLocal)
3934 {
3935 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3936 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3937 default: AssertFailed();
3938 }
3939
3940 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3941 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3942 }
3943 else
3944 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3945
3946 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3947 iemNativeVarRegisterRelease(pReNative, idxVar);
3948 return off;
3949}
3950
3951
3952
3953/*********************************************************************************************************************************
3954* EFLAGS *
3955*********************************************************************************************************************************/
3956
3957#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3958# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3959#else
3960# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3961 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3962
3963DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3964{
3965 if (fEflOutput)
3966 {
3967 PVMCPUCC const pVCpu = pReNative->pVCpu;
3968# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3969 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3970 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3971 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3972# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3973 if (fEflOutput & (a_fEfl)) \
3974 { \
3975 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3976 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3977 else \
3978 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3979 } else do { } while (0)
3980# else
3981 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3982 IEMLIVENESSBIT const LivenessClobbered =
3983 {
3984 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3985 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3986 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3987 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3988 };
3989 IEMLIVENESSBIT const LivenessDelayable =
3990 {
3991 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3992 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3993 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3994 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3995 };
3996# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3997 if (fEflOutput & (a_fEfl)) \
3998 { \
3999 if (LivenessClobbered.a_fLivenessMember) \
4000 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
4001 else if (LivenessDelayable.a_fLivenessMember) \
4002 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
4003 else \
4004 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
4005 } else do { } while (0)
4006# endif
4007 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
4008 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
4009 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
4010 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
4011 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
4012 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
4013 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
4014# undef CHECK_FLAG_AND_UPDATE_STATS
4015 }
4016 RT_NOREF(fEflInput);
4017}
4018#endif /* VBOX_WITH_STATISTICS */
4019
4020#undef IEM_MC_FETCH_EFLAGS /* should not be used */
4021#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4022 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
4023
4024/** Handles IEM_MC_FETCH_EFLAGS_EX. */
4025DECL_INLINE_THROW(uint32_t)
4026iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
4027 uint32_t fEflInput, uint32_t fEflOutput)
4028{
4029 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
4030 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4031 RT_NOREF(fEflInput, fEflOutput);
4032
4033#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4034# ifdef VBOX_STRICT
4035 if ( pReNative->idxCurCall != 0
4036 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
4037 {
4038 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
4039 uint32_t const fBoth = fEflInput | fEflOutput;
4040# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
4041 AssertMsg( !(fBoth & (a_fElfConst)) \
4042 || (!(fEflInput & (a_fElfConst)) \
4043 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4044 : !(fEflOutput & (a_fElfConst)) \
4045 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
4046 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
4047 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
4048 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
4049 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
4050 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
4051 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
4052 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
4053 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
4054 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
4055# undef ASSERT_ONE_EFL
4056 }
4057# endif
4058#endif
4059
4060 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4061
4062 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
4063 * the existing shadow copy. */
4064 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
4065 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4066 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
4067 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4068 return off;
4069}
4070
4071
4072
4073/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
4074 * start using it with custom native code emission (inlining assembly
4075 * instruction helpers). */
4076#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
4077#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4078 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4079 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
4080
4081#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
4082#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4083 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4084 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
4085
4086/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
4087DECL_INLINE_THROW(uint32_t)
4088iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
4089 bool fUpdateSkipping)
4090{
4091 RT_NOREF(fEflOutput);
4092 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
4093 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4094
4095#ifdef VBOX_STRICT
4096 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
4097 uint32_t offFixup = off;
4098 off = iemNativeEmitJnzToFixed(pReNative, off, off);
4099 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
4100 iemNativeFixupFixedJump(pReNative, offFixup, off);
4101
4102 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
4103 offFixup = off;
4104 off = iemNativeEmitJzToFixed(pReNative, off, off);
4105 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
4106 iemNativeFixupFixedJump(pReNative, offFixup, off);
4107
4108 /** @todo validate that only bits in the fElfOutput mask changed. */
4109#endif
4110
4111#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4112 if (fUpdateSkipping)
4113 {
4114 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4115 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4116 else
4117 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4118 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4119 }
4120#else
4121 RT_NOREF_PV(fUpdateSkipping);
4122#endif
4123
4124 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4125 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4126 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4127 return off;
4128}
4129
4130
4131typedef enum IEMNATIVEMITEFLOP
4132{
4133 kIemNativeEmitEflOp_Invalid = 0,
4134 kIemNativeEmitEflOp_Set,
4135 kIemNativeEmitEflOp_Clear,
4136 kIemNativeEmitEflOp_Flip
4137} IEMNATIVEMITEFLOP;
4138
4139#define IEM_MC_SET_EFL_BIT(a_fBit) \
4140 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
4141
4142#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
4143 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
4144
4145#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
4146 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
4147
4148/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
4149DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
4150{
4151 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4152 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
4153
4154 switch (enmOp)
4155 {
4156 case kIemNativeEmitEflOp_Set:
4157 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4158 break;
4159 case kIemNativeEmitEflOp_Clear:
4160 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
4161 break;
4162 case kIemNativeEmitEflOp_Flip:
4163 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4164 break;
4165 default:
4166 AssertFailed();
4167 break;
4168 }
4169
4170 /** @todo No delayed writeback for EFLAGS right now. */
4171 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4172
4173 /* Free but don't flush the EFLAGS register. */
4174 iemNativeRegFreeTmp(pReNative, idxEflReg);
4175
4176 return off;
4177}
4178
4179
4180/*********************************************************************************************************************************
4181* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4182*********************************************************************************************************************************/
4183
4184#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4185 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4186
4187#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4188 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4189
4190#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4191 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4192
4193
4194/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4195 * IEM_MC_FETCH_SREG_ZX_U64. */
4196DECL_INLINE_THROW(uint32_t)
4197iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4198{
4199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4200 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4201 Assert(iSReg < X86_SREG_COUNT);
4202
4203 /*
4204 * For now, we will not create a shadow copy of a selector. The rational
4205 * is that since we do not recompile the popping and loading of segment
4206 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4207 * pushing and moving to registers, there is only a small chance that the
4208 * shadow copy will be accessed again before the register is reloaded. One
4209 * scenario would be nested called in 16-bit code, but I doubt it's worth
4210 * the extra register pressure atm.
4211 *
4212 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4213 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4214 * store scencario covered at present (r160730).
4215 */
4216 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4217 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4218 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4219 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4220 return off;
4221}
4222
4223
4224
4225/*********************************************************************************************************************************
4226* Register references. *
4227*********************************************************************************************************************************/
4228
4229#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4230 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4231
4232#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4233 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4234
4235/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4236DECL_INLINE_THROW(uint32_t)
4237iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4238{
4239 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4240 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4241 Assert(iGRegEx < 20);
4242
4243 if (iGRegEx < 16)
4244 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4245 else
4246 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4247
4248 /* If we've delayed writing back the register value, flush it now. */
4249 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4250
4251 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4252 if (!fConst)
4253 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4254
4255 return off;
4256}
4257
4258#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4259 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4260
4261#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4262 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4263
4264#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4265 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4266
4267#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4268 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4269
4270#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4271 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4272
4273#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4274 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4275
4276#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4277 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4278
4279#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4280 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4281
4282#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4283 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4284
4285#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4286 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4287
4288/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4289DECL_INLINE_THROW(uint32_t)
4290iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4291{
4292 Assert(iGReg < 16);
4293 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4294 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4295
4296 /* If we've delayed writing back the register value, flush it now. */
4297 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4298
4299 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4300 if (!fConst)
4301 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4302
4303 return off;
4304}
4305
4306
4307#undef IEM_MC_REF_EFLAGS /* should not be used. */
4308#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4309 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4310 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4311
4312/** Handles IEM_MC_REF_EFLAGS. */
4313DECL_INLINE_THROW(uint32_t)
4314iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4315{
4316 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4317 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4318
4319#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4320 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4321
4322 /* Updating the skipping according to the outputs is a little early, but
4323 we don't have any other hooks for references atm. */
4324 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4325 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4326 else if (fEflOutput & X86_EFL_STATUS_BITS)
4327 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4328 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4329#else
4330 RT_NOREF(fEflInput, fEflOutput);
4331#endif
4332
4333 /* If we've delayed writing back the register value, flush it now. */
4334 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4335
4336 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4337 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4338
4339 return off;
4340}
4341
4342
4343/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4344 * different code from threaded recompiler, maybe it would be helpful. For now
4345 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4346#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4347
4348
4349#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4350 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4351
4352#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4353 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4354
4355#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4356 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4357
4358#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4359/* Just being paranoid here. */
4360# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4361AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4362AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4363AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4364AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4365# endif
4366AssertCompileMemberOffset(X86XMMREG, au64, 0);
4367AssertCompileMemberOffset(X86XMMREG, au32, 0);
4368AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4369AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4370
4371# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4372 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4373# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4374 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4375# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4376 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4377# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4378 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4379#endif
4380
4381/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4382DECL_INLINE_THROW(uint32_t)
4383iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4384{
4385 Assert(iXReg < 16);
4386 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4387 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4388
4389 /* If we've delayed writing back the register value, flush it now. */
4390 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4391
4392#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4393 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4394 if (!fConst)
4395 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4396#else
4397 RT_NOREF(fConst);
4398#endif
4399
4400 return off;
4401}
4402
4403
4404
4405/*********************************************************************************************************************************
4406* Effective Address Calculation *
4407*********************************************************************************************************************************/
4408#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4409 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4410
4411/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4412 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4413DECL_INLINE_THROW(uint32_t)
4414iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4415 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4416{
4417 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4418
4419 /*
4420 * Handle the disp16 form with no registers first.
4421 *
4422 * Convert to an immediate value, as that'll delay the register allocation
4423 * and assignment till the memory access / call / whatever and we can use
4424 * a more appropriate register (or none at all).
4425 */
4426 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4427 {
4428 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4429 return off;
4430 }
4431
4432 /* Determin the displacment. */
4433 uint16_t u16EffAddr;
4434 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4435 {
4436 case 0: u16EffAddr = 0; break;
4437 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4438 case 2: u16EffAddr = u16Disp; break;
4439 default: AssertFailedStmt(u16EffAddr = 0);
4440 }
4441
4442 /* Determine the registers involved. */
4443 uint8_t idxGstRegBase;
4444 uint8_t idxGstRegIndex;
4445 switch (bRm & X86_MODRM_RM_MASK)
4446 {
4447 case 0:
4448 idxGstRegBase = X86_GREG_xBX;
4449 idxGstRegIndex = X86_GREG_xSI;
4450 break;
4451 case 1:
4452 idxGstRegBase = X86_GREG_xBX;
4453 idxGstRegIndex = X86_GREG_xDI;
4454 break;
4455 case 2:
4456 idxGstRegBase = X86_GREG_xBP;
4457 idxGstRegIndex = X86_GREG_xSI;
4458 break;
4459 case 3:
4460 idxGstRegBase = X86_GREG_xBP;
4461 idxGstRegIndex = X86_GREG_xDI;
4462 break;
4463 case 4:
4464 idxGstRegBase = X86_GREG_xSI;
4465 idxGstRegIndex = UINT8_MAX;
4466 break;
4467 case 5:
4468 idxGstRegBase = X86_GREG_xDI;
4469 idxGstRegIndex = UINT8_MAX;
4470 break;
4471 case 6:
4472 idxGstRegBase = X86_GREG_xBP;
4473 idxGstRegIndex = UINT8_MAX;
4474 break;
4475#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4476 default:
4477#endif
4478 case 7:
4479 idxGstRegBase = X86_GREG_xBX;
4480 idxGstRegIndex = UINT8_MAX;
4481 break;
4482 }
4483
4484 /*
4485 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4486 */
4487 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4488 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4489 kIemNativeGstRegUse_ReadOnly);
4490 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4491 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4492 kIemNativeGstRegUse_ReadOnly)
4493 : UINT8_MAX;
4494#ifdef RT_ARCH_AMD64
4495 if (idxRegIndex == UINT8_MAX)
4496 {
4497 if (u16EffAddr == 0)
4498 {
4499 /* movxz ret, base */
4500 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4501 }
4502 else
4503 {
4504 /* lea ret32, [base64 + disp32] */
4505 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4506 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4507 if (idxRegRet >= 8 || idxRegBase >= 8)
4508 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4509 pbCodeBuf[off++] = 0x8d;
4510 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4511 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4512 else
4513 {
4514 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4515 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4516 }
4517 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4518 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4519 pbCodeBuf[off++] = 0;
4520 pbCodeBuf[off++] = 0;
4521 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4522
4523 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4524 }
4525 }
4526 else
4527 {
4528 /* lea ret32, [index64 + base64 (+ disp32)] */
4529 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4530 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4531 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4532 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4533 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4534 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4535 pbCodeBuf[off++] = 0x8d;
4536 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4537 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4538 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4539 if (bMod == X86_MOD_MEM4)
4540 {
4541 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4542 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4543 pbCodeBuf[off++] = 0;
4544 pbCodeBuf[off++] = 0;
4545 }
4546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4547 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4548 }
4549
4550#elif defined(RT_ARCH_ARM64)
4551 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4552 if (u16EffAddr == 0)
4553 {
4554 if (idxRegIndex == UINT8_MAX)
4555 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4556 else
4557 {
4558 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4559 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4560 }
4561 }
4562 else
4563 {
4564 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4565 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4566 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4567 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4568 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4569 else
4570 {
4571 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4572 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4573 }
4574 if (idxRegIndex != UINT8_MAX)
4575 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4576 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4577 }
4578
4579#else
4580# error "port me"
4581#endif
4582
4583 if (idxRegIndex != UINT8_MAX)
4584 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4585 iemNativeRegFreeTmp(pReNative, idxRegBase);
4586 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4587 return off;
4588}
4589
4590
4591#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4592 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4593
4594/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4595 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4596DECL_INLINE_THROW(uint32_t)
4597iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4598 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4599{
4600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4601
4602 /*
4603 * Handle the disp32 form with no registers first.
4604 *
4605 * Convert to an immediate value, as that'll delay the register allocation
4606 * and assignment till the memory access / call / whatever and we can use
4607 * a more appropriate register (or none at all).
4608 */
4609 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4610 {
4611 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4612 return off;
4613 }
4614
4615 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4616 uint32_t u32EffAddr = 0;
4617 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4618 {
4619 case 0: break;
4620 case 1: u32EffAddr = (int8_t)u32Disp; break;
4621 case 2: u32EffAddr = u32Disp; break;
4622 default: AssertFailed();
4623 }
4624
4625 /* Get the register (or SIB) value. */
4626 uint8_t idxGstRegBase = UINT8_MAX;
4627 uint8_t idxGstRegIndex = UINT8_MAX;
4628 uint8_t cShiftIndex = 0;
4629 switch (bRm & X86_MODRM_RM_MASK)
4630 {
4631 case 0: idxGstRegBase = X86_GREG_xAX; break;
4632 case 1: idxGstRegBase = X86_GREG_xCX; break;
4633 case 2: idxGstRegBase = X86_GREG_xDX; break;
4634 case 3: idxGstRegBase = X86_GREG_xBX; break;
4635 case 4: /* SIB */
4636 {
4637 /* index /w scaling . */
4638 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4639 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4640 {
4641 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4642 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4643 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4644 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4645 case 4: cShiftIndex = 0; /*no index*/ break;
4646 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4647 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4648 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4649 }
4650
4651 /* base */
4652 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4653 {
4654 case 0: idxGstRegBase = X86_GREG_xAX; break;
4655 case 1: idxGstRegBase = X86_GREG_xCX; break;
4656 case 2: idxGstRegBase = X86_GREG_xDX; break;
4657 case 3: idxGstRegBase = X86_GREG_xBX; break;
4658 case 4:
4659 idxGstRegBase = X86_GREG_xSP;
4660 u32EffAddr += uSibAndRspOffset >> 8;
4661 break;
4662 case 5:
4663 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4664 idxGstRegBase = X86_GREG_xBP;
4665 else
4666 {
4667 Assert(u32EffAddr == 0);
4668 u32EffAddr = u32Disp;
4669 }
4670 break;
4671 case 6: idxGstRegBase = X86_GREG_xSI; break;
4672 case 7: idxGstRegBase = X86_GREG_xDI; break;
4673 }
4674 break;
4675 }
4676 case 5: idxGstRegBase = X86_GREG_xBP; break;
4677 case 6: idxGstRegBase = X86_GREG_xSI; break;
4678 case 7: idxGstRegBase = X86_GREG_xDI; break;
4679 }
4680
4681 /*
4682 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4683 * the start of the function.
4684 */
4685 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4686 {
4687 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4688 return off;
4689 }
4690
4691 /*
4692 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4693 */
4694 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4695 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4696 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4697 kIemNativeGstRegUse_ReadOnly);
4698 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4699 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4700 kIemNativeGstRegUse_ReadOnly);
4701
4702 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4703 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4704 {
4705 idxRegBase = idxRegIndex;
4706 idxRegIndex = UINT8_MAX;
4707 }
4708
4709#ifdef RT_ARCH_AMD64
4710 if (idxRegIndex == UINT8_MAX)
4711 {
4712 if (u32EffAddr == 0)
4713 {
4714 /* mov ret, base */
4715 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4716 }
4717 else
4718 {
4719 /* lea ret32, [base64 + disp32] */
4720 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4721 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4722 if (idxRegRet >= 8 || idxRegBase >= 8)
4723 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4724 pbCodeBuf[off++] = 0x8d;
4725 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4726 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4727 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4728 else
4729 {
4730 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4731 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4732 }
4733 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4734 if (bMod == X86_MOD_MEM4)
4735 {
4736 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4737 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4738 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4739 }
4740 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4741 }
4742 }
4743 else
4744 {
4745 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4746 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4747 if (idxRegBase == UINT8_MAX)
4748 {
4749 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4750 if (idxRegRet >= 8 || idxRegIndex >= 8)
4751 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4752 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4753 pbCodeBuf[off++] = 0x8d;
4754 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4755 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4756 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4757 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4758 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4759 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4760 }
4761 else
4762 {
4763 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4764 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4765 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4766 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4767 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4768 pbCodeBuf[off++] = 0x8d;
4769 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4770 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4771 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4772 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4773 if (bMod != X86_MOD_MEM0)
4774 {
4775 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4776 if (bMod == X86_MOD_MEM4)
4777 {
4778 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4779 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4780 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4781 }
4782 }
4783 }
4784 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4785 }
4786
4787#elif defined(RT_ARCH_ARM64)
4788 if (u32EffAddr == 0)
4789 {
4790 if (idxRegIndex == UINT8_MAX)
4791 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4792 else if (idxRegBase == UINT8_MAX)
4793 {
4794 if (cShiftIndex == 0)
4795 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4796 else
4797 {
4798 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4799 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4800 }
4801 }
4802 else
4803 {
4804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4805 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4806 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4807 }
4808 }
4809 else
4810 {
4811 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4812 {
4813 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4814 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4815 }
4816 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4817 {
4818 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4819 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4820 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4821 }
4822 else
4823 {
4824 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4825 if (idxRegBase != UINT8_MAX)
4826 {
4827 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4829 }
4830 }
4831 if (idxRegIndex != UINT8_MAX)
4832 {
4833 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4834 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4835 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4836 }
4837 }
4838
4839#else
4840# error "port me"
4841#endif
4842
4843 if (idxRegIndex != UINT8_MAX)
4844 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4845 if (idxRegBase != UINT8_MAX)
4846 iemNativeRegFreeTmp(pReNative, idxRegBase);
4847 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4848 return off;
4849}
4850
4851
4852#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4853 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4854 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4855
4856#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4857 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4858 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4859
4860#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4861 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4862 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4863
4864/**
4865 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4866 *
4867 * @returns New off.
4868 * @param pReNative .
4869 * @param off .
4870 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4871 * bit 4 to REX.X. The two bits are part of the
4872 * REG sub-field, which isn't needed in this
4873 * function.
4874 * @param uSibAndRspOffset Two parts:
4875 * - The first 8 bits make up the SIB byte.
4876 * - The next 8 bits are the fixed RSP/ESP offset
4877 * in case of a pop [xSP].
4878 * @param u32Disp The displacement byte/word/dword, if any.
4879 * @param cbInstr The size of the fully decoded instruction. Used
4880 * for RIP relative addressing.
4881 * @param idxVarRet The result variable number.
4882 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4883 * when calculating the address.
4884 *
4885 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4886 */
4887DECL_INLINE_THROW(uint32_t)
4888iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4889 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4890{
4891 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4892
4893 /*
4894 * Special case the rip + disp32 form first.
4895 */
4896 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4897 {
4898#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4899 /* Need to take the current PC offset into account for the displacement, no need to flush here
4900 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4901 u32Disp += pReNative->Core.offPc;
4902#endif
4903
4904 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4905 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4906 kIemNativeGstRegUse_ReadOnly);
4907#ifdef RT_ARCH_AMD64
4908 if (f64Bit)
4909 {
4910 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4911 if ((int32_t)offFinalDisp == offFinalDisp)
4912 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4913 else
4914 {
4915 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4916 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4917 }
4918 }
4919 else
4920 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4921
4922#elif defined(RT_ARCH_ARM64)
4923 if (f64Bit)
4924 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4925 (int64_t)(int32_t)u32Disp + cbInstr);
4926 else
4927 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4928 (int32_t)u32Disp + cbInstr);
4929
4930#else
4931# error "Port me!"
4932#endif
4933 iemNativeRegFreeTmp(pReNative, idxRegPc);
4934 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4935 return off;
4936 }
4937
4938 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4939 int64_t i64EffAddr = 0;
4940 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4941 {
4942 case 0: break;
4943 case 1: i64EffAddr = (int8_t)u32Disp; break;
4944 case 2: i64EffAddr = (int32_t)u32Disp; break;
4945 default: AssertFailed();
4946 }
4947
4948 /* Get the register (or SIB) value. */
4949 uint8_t idxGstRegBase = UINT8_MAX;
4950 uint8_t idxGstRegIndex = UINT8_MAX;
4951 uint8_t cShiftIndex = 0;
4952 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4953 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4954 else /* SIB: */
4955 {
4956 /* index /w scaling . */
4957 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4958 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4959 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4960 if (idxGstRegIndex == 4)
4961 {
4962 /* no index */
4963 cShiftIndex = 0;
4964 idxGstRegIndex = UINT8_MAX;
4965 }
4966
4967 /* base */
4968 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4969 if (idxGstRegBase == 4)
4970 {
4971 /* pop [rsp] hack */
4972 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4973 }
4974 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4975 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4976 {
4977 /* mod=0 and base=5 -> disp32, no base reg. */
4978 Assert(i64EffAddr == 0);
4979 i64EffAddr = (int32_t)u32Disp;
4980 idxGstRegBase = UINT8_MAX;
4981 }
4982 }
4983
4984 /*
4985 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4986 * the start of the function.
4987 */
4988 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4989 {
4990 if (f64Bit)
4991 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4992 else
4993 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4994 return off;
4995 }
4996
4997 /*
4998 * Now emit code that calculates:
4999 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5000 * or if !f64Bit:
5001 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5002 */
5003 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5004 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5005 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5006 kIemNativeGstRegUse_ReadOnly);
5007 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5008 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5009 kIemNativeGstRegUse_ReadOnly);
5010
5011 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5012 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5013 {
5014 idxRegBase = idxRegIndex;
5015 idxRegIndex = UINT8_MAX;
5016 }
5017
5018#ifdef RT_ARCH_AMD64
5019 uint8_t bFinalAdj;
5020 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
5021 bFinalAdj = 0; /* likely */
5022 else
5023 {
5024 /* pop [rsp] with a problematic disp32 value. Split out the
5025 RSP offset and add it separately afterwards (bFinalAdj). */
5026 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
5027 Assert(idxGstRegBase == X86_GREG_xSP);
5028 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
5029 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
5030 Assert(bFinalAdj != 0);
5031 i64EffAddr -= bFinalAdj;
5032 Assert((int32_t)i64EffAddr == i64EffAddr);
5033 }
5034 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
5035//pReNative->pInstrBuf[off++] = 0xcc;
5036
5037 if (idxRegIndex == UINT8_MAX)
5038 {
5039 if (u32EffAddr == 0)
5040 {
5041 /* mov ret, base */
5042 if (f64Bit)
5043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
5044 else
5045 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5046 }
5047 else
5048 {
5049 /* lea ret, [base + disp32] */
5050 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5051 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5052 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
5053 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5054 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5055 | (f64Bit ? X86_OP_REX_W : 0);
5056 pbCodeBuf[off++] = 0x8d;
5057 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5058 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5059 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5060 else
5061 {
5062 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5063 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5064 }
5065 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5066 if (bMod == X86_MOD_MEM4)
5067 {
5068 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5069 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5070 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5071 }
5072 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5073 }
5074 }
5075 else
5076 {
5077 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5078 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5079 if (idxRegBase == UINT8_MAX)
5080 {
5081 /* lea ret, [(index64 << cShiftIndex) + disp32] */
5082 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
5083 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5084 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5085 | (f64Bit ? X86_OP_REX_W : 0);
5086 pbCodeBuf[off++] = 0x8d;
5087 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5088 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5089 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5090 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5091 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5092 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5093 }
5094 else
5095 {
5096 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5097 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5098 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5099 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5100 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5101 | (f64Bit ? X86_OP_REX_W : 0);
5102 pbCodeBuf[off++] = 0x8d;
5103 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5104 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5105 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5106 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5107 if (bMod != X86_MOD_MEM0)
5108 {
5109 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5110 if (bMod == X86_MOD_MEM4)
5111 {
5112 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5113 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5114 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5115 }
5116 }
5117 }
5118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5119 }
5120
5121 if (!bFinalAdj)
5122 { /* likely */ }
5123 else
5124 {
5125 Assert(f64Bit);
5126 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
5127 }
5128
5129#elif defined(RT_ARCH_ARM64)
5130 if (i64EffAddr == 0)
5131 {
5132 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5133 if (idxRegIndex == UINT8_MAX)
5134 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
5135 else if (idxRegBase != UINT8_MAX)
5136 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5137 f64Bit, false /*fSetFlags*/, cShiftIndex);
5138 else
5139 {
5140 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
5141 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
5142 }
5143 }
5144 else
5145 {
5146 if (f64Bit)
5147 { /* likely */ }
5148 else
5149 i64EffAddr = (int32_t)i64EffAddr;
5150
5151 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
5152 {
5153 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5154 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5155 }
5156 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5157 {
5158 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5159 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5160 }
5161 else
5162 {
5163 if (f64Bit)
5164 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5165 else
5166 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5167 if (idxRegBase != UINT8_MAX)
5168 {
5169 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5170 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5171 }
5172 }
5173 if (idxRegIndex != UINT8_MAX)
5174 {
5175 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5176 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5177 f64Bit, false /*fSetFlags*/, cShiftIndex);
5178 }
5179 }
5180
5181#else
5182# error "port me"
5183#endif
5184
5185 if (idxRegIndex != UINT8_MAX)
5186 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5187 if (idxRegBase != UINT8_MAX)
5188 iemNativeRegFreeTmp(pReNative, idxRegBase);
5189 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5190 return off;
5191}
5192
5193
5194/*********************************************************************************************************************************
5195* Memory fetches and stores common *
5196*********************************************************************************************************************************/
5197
5198typedef enum IEMNATIVEMITMEMOP
5199{
5200 kIemNativeEmitMemOp_Store = 0,
5201 kIemNativeEmitMemOp_Fetch,
5202 kIemNativeEmitMemOp_Fetch_Zx_U16,
5203 kIemNativeEmitMemOp_Fetch_Zx_U32,
5204 kIemNativeEmitMemOp_Fetch_Zx_U64,
5205 kIemNativeEmitMemOp_Fetch_Sx_U16,
5206 kIemNativeEmitMemOp_Fetch_Sx_U32,
5207 kIemNativeEmitMemOp_Fetch_Sx_U64
5208} IEMNATIVEMITMEMOP;
5209
5210/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5211 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5212 * (with iSegReg = UINT8_MAX). */
5213DECL_INLINE_THROW(uint32_t)
5214iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5215 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5216 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5217{
5218 /*
5219 * Assert sanity.
5220 */
5221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5222 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5223 Assert( enmOp != kIemNativeEmitMemOp_Store
5224 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5225 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5227 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5228 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5229 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5230 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5231 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5232#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5233 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5234 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5235#else
5236 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5237#endif
5238 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5239#ifdef VBOX_STRICT
5240 if (iSegReg == UINT8_MAX)
5241 {
5242 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5243 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5244 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5245 switch (cbMem)
5246 {
5247 case 1:
5248 Assert( pfnFunction
5249 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5250 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5251 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5252 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5253 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5254 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5255 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5256 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5257 : UINT64_C(0xc000b000a0009000) ));
5258 break;
5259 case 2:
5260 Assert( pfnFunction
5261 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5262 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5263 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5264 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5265 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5266 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5267 : UINT64_C(0xc000b000a0009000) ));
5268 break;
5269 case 4:
5270 Assert( pfnFunction
5271 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5272 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5273 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5274 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5275 : UINT64_C(0xc000b000a0009000) ));
5276 break;
5277 case 8:
5278 Assert( pfnFunction
5279 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5280 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5281 : UINT64_C(0xc000b000a0009000) ));
5282 break;
5283#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5284 case sizeof(RTUINT128U):
5285 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5286 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5287 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5288 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5289 || ( enmOp == kIemNativeEmitMemOp_Store
5290 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5291 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5292 break;
5293 case sizeof(RTUINT256U):
5294 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5295 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5296 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5297 || ( enmOp == kIemNativeEmitMemOp_Store
5298 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5299 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5300 break;
5301#endif
5302 }
5303 }
5304 else
5305 {
5306 Assert(iSegReg < 6);
5307 switch (cbMem)
5308 {
5309 case 1:
5310 Assert( pfnFunction
5311 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5312 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5313 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5314 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5315 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5316 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5317 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5318 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5319 : UINT64_C(0xc000b000a0009000) ));
5320 break;
5321 case 2:
5322 Assert( pfnFunction
5323 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5324 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5325 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5326 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5327 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5328 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5329 : UINT64_C(0xc000b000a0009000) ));
5330 break;
5331 case 4:
5332 Assert( pfnFunction
5333 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5334 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5335 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5336 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5337 : UINT64_C(0xc000b000a0009000) ));
5338 break;
5339 case 8:
5340 Assert( pfnFunction
5341 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5342 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5343 : UINT64_C(0xc000b000a0009000) ));
5344 break;
5345#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5346 case sizeof(RTUINT128U):
5347 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5348 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5349 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5350 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5351 || ( enmOp == kIemNativeEmitMemOp_Store
5352 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5353 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5354 break;
5355 case sizeof(RTUINT256U):
5356 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5357 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5358 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5359 || ( enmOp == kIemNativeEmitMemOp_Store
5360 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5361 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5362 break;
5363#endif
5364 }
5365 }
5366#endif
5367
5368#ifdef VBOX_STRICT
5369 /*
5370 * Check that the fExec flags we've got make sense.
5371 */
5372 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5373#endif
5374
5375 /*
5376 * To keep things simple we have to commit any pending writes first as we
5377 * may end up making calls.
5378 */
5379 /** @todo we could postpone this till we make the call and reload the
5380 * registers after returning from the call. Not sure if that's sensible or
5381 * not, though. */
5382#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5383 off = iemNativeRegFlushPendingWrites(pReNative, off);
5384#else
5385 /* The program counter is treated differently for now. */
5386 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5387#endif
5388
5389#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5390 /*
5391 * Move/spill/flush stuff out of call-volatile registers.
5392 * This is the easy way out. We could contain this to the tlb-miss branch
5393 * by saving and restoring active stuff here.
5394 */
5395 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5396#endif
5397
5398 /*
5399 * Define labels and allocate the result register (trying for the return
5400 * register if we can).
5401 */
5402 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5403#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5404 uint8_t idxRegValueFetch = UINT8_MAX;
5405
5406 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5407 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5408 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5409 else
5410 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5411 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5412 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5413 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5414#else
5415 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5416 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5417 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5418 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5419#endif
5420 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5421
5422#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5423 uint8_t idxRegValueStore = UINT8_MAX;
5424
5425 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5426 idxRegValueStore = !TlbState.fSkip
5427 && enmOp == kIemNativeEmitMemOp_Store
5428 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5429 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5430 : UINT8_MAX;
5431 else
5432 idxRegValueStore = !TlbState.fSkip
5433 && enmOp == kIemNativeEmitMemOp_Store
5434 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5435 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5436 : UINT8_MAX;
5437
5438#else
5439 uint8_t const idxRegValueStore = !TlbState.fSkip
5440 && enmOp == kIemNativeEmitMemOp_Store
5441 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5442 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5443 : UINT8_MAX;
5444#endif
5445 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5446 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5447 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5448 : UINT32_MAX;
5449
5450 /*
5451 * Jump to the TLB lookup code.
5452 */
5453 if (!TlbState.fSkip)
5454 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5455
5456 /*
5457 * TlbMiss:
5458 *
5459 * Call helper to do the fetching.
5460 * We flush all guest register shadow copies here.
5461 */
5462 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5463
5464#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5465 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5466#else
5467 RT_NOREF(idxInstr);
5468#endif
5469
5470#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5471 if (pReNative->Core.offPc)
5472 {
5473 /*
5474 * Update the program counter but restore it at the end of the TlbMiss branch.
5475 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5476 * which are hopefully much more frequent, reducing the amount of memory accesses.
5477 */
5478 /* Allocate a temporary PC register. */
5479 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5480
5481 /* Perform the addition and store the result. */
5482 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5483 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5484
5485 /* Free and flush the PC register. */
5486 iemNativeRegFreeTmp(pReNative, idxPcReg);
5487 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5488 }
5489#endif
5490
5491#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5492 /* Save variables in volatile registers. */
5493 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5494 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5495 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5496 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5497#endif
5498
5499 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5500 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5501#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5502 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5503 {
5504 /*
5505 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5506 *
5507 * @note There was a register variable assigned to the variable for the TlbLookup case above
5508 * which must not be freed or the value loaded into the register will not be synced into the register
5509 * further down the road because the variable doesn't know it had a variable assigned.
5510 *
5511 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5512 * as it will be overwritten anyway.
5513 */
5514 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5515 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5516 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5517 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5518 }
5519 else
5520#endif
5521 if (enmOp == kIemNativeEmitMemOp_Store)
5522 {
5523 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5524 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5525#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5526 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5527#else
5528 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5529 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5530#endif
5531 }
5532
5533 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5534 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5535#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5536 fVolGregMask);
5537#else
5538 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5539#endif
5540
5541 if (iSegReg != UINT8_MAX)
5542 {
5543 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5544 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5545 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5546 }
5547
5548 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5549 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5550
5551 /* Done setting up parameters, make the call. */
5552 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5553
5554 /*
5555 * Put the result in the right register if this is a fetch.
5556 */
5557 if (enmOp != kIemNativeEmitMemOp_Store)
5558 {
5559#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5560 if ( cbMem == sizeof(RTUINT128U)
5561 || cbMem == sizeof(RTUINT256U))
5562 {
5563 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5564
5565 /* Sync the value on the stack with the host register assigned to the variable. */
5566 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5567 }
5568 else
5569#endif
5570 {
5571 Assert(idxRegValueFetch == pVarValue->idxReg);
5572 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5573 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5574 }
5575 }
5576
5577#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5578 /* Restore variables and guest shadow registers to volatile registers. */
5579 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5580 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5581#endif
5582
5583#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5584 if (pReNative->Core.offPc)
5585 {
5586 /*
5587 * Time to restore the program counter to its original value.
5588 */
5589 /* Allocate a temporary PC register. */
5590 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5591
5592 /* Restore the original value. */
5593 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5594 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5595
5596 /* Free and flush the PC register. */
5597 iemNativeRegFreeTmp(pReNative, idxPcReg);
5598 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5599 }
5600#endif
5601
5602#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5603 if (!TlbState.fSkip)
5604 {
5605 /* end of TlbMiss - Jump to the done label. */
5606 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5607 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5608
5609 /*
5610 * TlbLookup:
5611 */
5612 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5613 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5614 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5615
5616 /*
5617 * Emit code to do the actual storing / fetching.
5618 */
5619 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5620# ifdef VBOX_WITH_STATISTICS
5621 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5622 enmOp == kIemNativeEmitMemOp_Store
5623 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5624 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5625# endif
5626 switch (enmOp)
5627 {
5628 case kIemNativeEmitMemOp_Store:
5629 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5630 {
5631 switch (cbMem)
5632 {
5633 case 1:
5634 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5635 break;
5636 case 2:
5637 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5638 break;
5639 case 4:
5640 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5641 break;
5642 case 8:
5643 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5644 break;
5645#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5646 case sizeof(RTUINT128U):
5647 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5648 break;
5649 case sizeof(RTUINT256U):
5650 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5651 break;
5652#endif
5653 default:
5654 AssertFailed();
5655 }
5656 }
5657 else
5658 {
5659 switch (cbMem)
5660 {
5661 case 1:
5662 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5663 idxRegMemResult, TlbState.idxReg1);
5664 break;
5665 case 2:
5666 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5667 idxRegMemResult, TlbState.idxReg1);
5668 break;
5669 case 4:
5670 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5671 idxRegMemResult, TlbState.idxReg1);
5672 break;
5673 case 8:
5674 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5675 idxRegMemResult, TlbState.idxReg1);
5676 break;
5677 default:
5678 AssertFailed();
5679 }
5680 }
5681 break;
5682
5683 case kIemNativeEmitMemOp_Fetch:
5684 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5685 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5686 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5687 switch (cbMem)
5688 {
5689 case 1:
5690 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5691 break;
5692 case 2:
5693 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5694 break;
5695 case 4:
5696 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5697 break;
5698 case 8:
5699 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5700 break;
5701#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5702 case sizeof(RTUINT128U):
5703 /*
5704 * No need to sync back the register with the stack, this is done by the generic variable handling
5705 * code if there is a register assigned to a variable and the stack must be accessed.
5706 */
5707 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5708 break;
5709 case sizeof(RTUINT256U):
5710 /*
5711 * No need to sync back the register with the stack, this is done by the generic variable handling
5712 * code if there is a register assigned to a variable and the stack must be accessed.
5713 */
5714 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5715 break;
5716#endif
5717 default:
5718 AssertFailed();
5719 }
5720 break;
5721
5722 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5723 Assert(cbMem == 1);
5724 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5725 break;
5726
5727 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5728 Assert(cbMem == 1 || cbMem == 2);
5729 if (cbMem == 1)
5730 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5731 else
5732 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5733 break;
5734
5735 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5736 switch (cbMem)
5737 {
5738 case 1:
5739 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5740 break;
5741 case 2:
5742 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5743 break;
5744 case 4:
5745 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5746 break;
5747 default:
5748 AssertFailed();
5749 }
5750 break;
5751
5752 default:
5753 AssertFailed();
5754 }
5755
5756 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5757
5758 /*
5759 * TlbDone:
5760 */
5761 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5762
5763 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5764
5765# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5766 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5767 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5768# endif
5769 }
5770#else
5771 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5772#endif
5773
5774 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5775 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5776 return off;
5777}
5778
5779
5780
5781/*********************************************************************************************************************************
5782* Memory fetches (IEM_MEM_FETCH_XXX). *
5783*********************************************************************************************************************************/
5784
5785/* 8-bit segmented: */
5786#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5787 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5788 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5789 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5790
5791#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5792 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5793 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5794 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5795
5796#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5797 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5798 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5799 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5800
5801#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5802 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5803 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5804 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5805
5806#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5807 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5808 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5809 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5810
5811#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5812 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5813 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5814 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5815
5816#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5817 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5818 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5819 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5820
5821/* 16-bit segmented: */
5822#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5823 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5824 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5825 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5826
5827#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5828 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5829 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5830 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5831
5832#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5833 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5834 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5835 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5836
5837#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5838 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5839 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5840 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5841
5842#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5843 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5844 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5845 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5846
5847#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5848 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5849 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5850 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5851
5852
5853/* 32-bit segmented: */
5854#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5856 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5857 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5858
5859#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5860 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5861 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5862 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5863
5864#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5866 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5867 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5868
5869#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5871 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5872 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5873
5874#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
5875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
5876 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5877 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5878
5879#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
5880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
5881 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5882 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5883
5884#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
5885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
5886 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5887 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5888
5889AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5890#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5891 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5892 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5893 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5894
5895
5896/* 64-bit segmented: */
5897#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5899 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5900 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5901
5902AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5903#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5905 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5906 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5907
5908
5909/* 8-bit flat: */
5910#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5911 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5912 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5913 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5914
5915#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5917 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5918 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5919
5920#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5922 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5923 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5924
5925#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5927 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5928 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5929
5930#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5932 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5933 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5934
5935#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5937 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5938 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5939
5940#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5942 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5943 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5944
5945
5946/* 16-bit flat: */
5947#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5949 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5950 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5951
5952#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5954 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5955 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5956
5957#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5959 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5960 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5961
5962#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5964 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5965 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5966
5967#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5968 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5969 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5970 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5971
5972#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5974 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5975 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5976
5977/* 32-bit flat: */
5978#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5980 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5981 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5982
5983#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5985 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5986 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5987
5988#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5990 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5991 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5992
5993#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5994 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5995 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5996 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5997
5998#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
5999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
6000 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6001 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6002
6003#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
6004 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
6005 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6006 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6007
6008#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
6009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
6010 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6011 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6012
6013#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
6014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
6015 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6016 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6017
6018
6019/* 64-bit flat: */
6020#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
6021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6022 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6023 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6024
6025#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
6026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
6027 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6028 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
6029
6030#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6031/* 128-bit segmented: */
6032#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
6033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6034 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6035 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
6036
6037#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
6038 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6039 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6040 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6041
6042AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
6043#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
6044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
6045 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6046 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
6047
6048#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
6049 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
6050 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6051 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
6052
6053/* 128-bit flat: */
6054#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
6055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6056 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6057 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
6058
6059#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
6060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6061 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6062 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6063
6064#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
6065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
6066 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6067 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6068
6069#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
6070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6071 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6072 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
6073
6074/* 256-bit segmented: */
6075#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
6076 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6077 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6078 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6079
6080#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
6081 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6082 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6083 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6084
6085#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
6086 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6087 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6088 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6089
6090
6091/* 256-bit flat: */
6092#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
6093 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6094 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6095 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6096
6097#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
6098 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6099 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6100 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6101
6102#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
6103 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6104 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6105 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6106#endif
6107
6108
6109/*********************************************************************************************************************************
6110* Memory stores (IEM_MEM_STORE_XXX). *
6111*********************************************************************************************************************************/
6112
6113#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
6114 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
6115 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6116 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6117
6118#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
6119 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
6120 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6121 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6122
6123#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
6124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
6125 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6126 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6127
6128#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
6129 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
6130 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6131 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6132
6133
6134#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
6135 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
6136 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6137 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6138
6139#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
6140 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
6141 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6142 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6143
6144#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
6145 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
6146 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6147 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6148
6149#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
6150 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
6151 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6152 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6153
6154
6155#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
6156 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6157 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6158
6159#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
6160 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6161 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6162
6163#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
6164 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6165 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6166
6167#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
6168 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6169 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6170
6171
6172#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
6173 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6174 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6175
6176#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
6177 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6178 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6179
6180#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
6181 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6182 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6183
6184#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6185 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6186 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6187
6188/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6189 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6190DECL_INLINE_THROW(uint32_t)
6191iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6192 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6193{
6194 /*
6195 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6196 * to do the grunt work.
6197 */
6198 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6199 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6200 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6201 pfnFunction, idxInstr);
6202 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6203 return off;
6204}
6205
6206
6207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6208# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6209 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6210 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6211 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6212
6213# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6214 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6215 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6216 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6217
6218# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6219 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6220 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6221 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6222
6223# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6224 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6225 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6226 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6227
6228
6229# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6230 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6231 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6232 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6233
6234# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6235 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6236 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6237 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6238
6239# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6240 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6241 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6242 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6243
6244# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6245 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6246 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6247 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6248#endif
6249
6250
6251
6252/*********************************************************************************************************************************
6253* Stack Accesses. *
6254*********************************************************************************************************************************/
6255/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6256#define IEM_MC_PUSH_U16(a_u16Value) \
6257 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6258 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6259#define IEM_MC_PUSH_U32(a_u32Value) \
6260 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6261 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6262#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6263 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6264 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6265#define IEM_MC_PUSH_U64(a_u64Value) \
6266 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6267 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6268
6269#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6270 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6271 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6272#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6273 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6274 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6275#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6276 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6277 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6278
6279#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6280 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6281 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6282#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6283 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6284 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6285
6286
6287DECL_FORCE_INLINE_THROW(uint32_t)
6288iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6289{
6290 /* Use16BitSp: */
6291#ifdef RT_ARCH_AMD64
6292 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6293 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6294#else
6295 /* sub regeff, regrsp, #cbMem */
6296 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6297 /* and regeff, regeff, #0xffff */
6298 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6299 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6300 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6301 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6302#endif
6303 return off;
6304}
6305
6306
6307DECL_FORCE_INLINE(uint32_t)
6308iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6309{
6310 /* Use32BitSp: */
6311 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6312 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6313 return off;
6314}
6315
6316
6317/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6318DECL_INLINE_THROW(uint32_t)
6319iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6320 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6321{
6322 /*
6323 * Assert sanity.
6324 */
6325 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6326 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6327#ifdef VBOX_STRICT
6328 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6329 {
6330 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6331 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6332 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6333 Assert( pfnFunction
6334 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6335 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6336 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6337 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6338 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6339 : UINT64_C(0xc000b000a0009000) ));
6340 }
6341 else
6342 Assert( pfnFunction
6343 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6344 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6345 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6346 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6347 : UINT64_C(0xc000b000a0009000) ));
6348#endif
6349
6350#ifdef VBOX_STRICT
6351 /*
6352 * Check that the fExec flags we've got make sense.
6353 */
6354 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6355#endif
6356
6357 /*
6358 * To keep things simple we have to commit any pending writes first as we
6359 * may end up making calls.
6360 */
6361 /** @todo we could postpone this till we make the call and reload the
6362 * registers after returning from the call. Not sure if that's sensible or
6363 * not, though. */
6364 off = iemNativeRegFlushPendingWrites(pReNative, off);
6365
6366 /*
6367 * First we calculate the new RSP and the effective stack pointer value.
6368 * For 64-bit mode and flat 32-bit these two are the same.
6369 * (Code structure is very similar to that of PUSH)
6370 */
6371 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6372 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6373 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6374 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6375 ? cbMem : sizeof(uint16_t);
6376 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6377 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6378 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6379 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6380 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6381 if (cBitsFlat != 0)
6382 {
6383 Assert(idxRegEffSp == idxRegRsp);
6384 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6385 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6386 if (cBitsFlat == 64)
6387 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6388 else
6389 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6390 }
6391 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6392 {
6393 Assert(idxRegEffSp != idxRegRsp);
6394 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6395 kIemNativeGstRegUse_ReadOnly);
6396#ifdef RT_ARCH_AMD64
6397 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6398#else
6399 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6400#endif
6401 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6402 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6403 offFixupJumpToUseOtherBitSp = off;
6404 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6405 {
6406 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6407 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6408 }
6409 else
6410 {
6411 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6412 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6413 }
6414 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6415 }
6416 /* SpUpdateEnd: */
6417 uint32_t const offLabelSpUpdateEnd = off;
6418
6419 /*
6420 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6421 * we're skipping lookup).
6422 */
6423 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6424 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6425 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6426 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6427 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6428 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6429 : UINT32_MAX;
6430 uint8_t const idxRegValue = !TlbState.fSkip
6431 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6432 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6433 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6434 : UINT8_MAX;
6435 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6436
6437
6438 if (!TlbState.fSkip)
6439 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6440 else
6441 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6442
6443 /*
6444 * Use16BitSp:
6445 */
6446 if (cBitsFlat == 0)
6447 {
6448#ifdef RT_ARCH_AMD64
6449 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6450#else
6451 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6452#endif
6453 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6454 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6455 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6456 else
6457 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6458 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6460 }
6461
6462 /*
6463 * TlbMiss:
6464 *
6465 * Call helper to do the pushing.
6466 */
6467 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6468
6469#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6470 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6471#else
6472 RT_NOREF(idxInstr);
6473#endif
6474
6475 /* Save variables in volatile registers. */
6476 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6477 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6478 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6479 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6480 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6481
6482 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6483 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6484 {
6485 /* Swap them using ARG0 as temp register: */
6486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6487 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6488 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6489 }
6490 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6491 {
6492 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6493 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6494 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6495
6496 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6497 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6498 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6499 }
6500 else
6501 {
6502 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6503 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6504
6505 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6506 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6507 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6508 }
6509
6510 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6511 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6512
6513 /* Done setting up parameters, make the call. */
6514 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6515
6516 /* Restore variables and guest shadow registers to volatile registers. */
6517 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6518 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6519
6520#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6521 if (!TlbState.fSkip)
6522 {
6523 /* end of TlbMiss - Jump to the done label. */
6524 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6525 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6526
6527 /*
6528 * TlbLookup:
6529 */
6530 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6531 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6532
6533 /*
6534 * Emit code to do the actual storing / fetching.
6535 */
6536 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6537# ifdef VBOX_WITH_STATISTICS
6538 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6539 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6540# endif
6541 if (idxRegValue != UINT8_MAX)
6542 {
6543 switch (cbMemAccess)
6544 {
6545 case 2:
6546 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6547 break;
6548 case 4:
6549 if (!fIsIntelSeg)
6550 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6551 else
6552 {
6553 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6554 PUSH FS in real mode, so we have to try emulate that here.
6555 We borrow the now unused idxReg1 from the TLB lookup code here. */
6556 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6557 kIemNativeGstReg_EFlags);
6558 if (idxRegEfl != UINT8_MAX)
6559 {
6560#ifdef ARCH_AMD64
6561 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6562 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6563 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6564#else
6565 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6566 off, TlbState.idxReg1, idxRegEfl,
6567 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6568#endif
6569 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6570 }
6571 else
6572 {
6573 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6574 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6575 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6576 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6577 }
6578 /* ASSUMES the upper half of idxRegValue is ZERO. */
6579 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6580 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6581 }
6582 break;
6583 case 8:
6584 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6585 break;
6586 default:
6587 AssertFailed();
6588 }
6589 }
6590 else
6591 {
6592 switch (cbMemAccess)
6593 {
6594 case 2:
6595 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6596 idxRegMemResult, TlbState.idxReg1);
6597 break;
6598 case 4:
6599 Assert(!fIsSegReg);
6600 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6601 idxRegMemResult, TlbState.idxReg1);
6602 break;
6603 case 8:
6604 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6605 break;
6606 default:
6607 AssertFailed();
6608 }
6609 }
6610
6611 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6612 TlbState.freeRegsAndReleaseVars(pReNative);
6613
6614 /*
6615 * TlbDone:
6616 *
6617 * Commit the new RSP value.
6618 */
6619 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6620 }
6621#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6622
6623#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6624 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6625#endif
6626 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6627 if (idxRegEffSp != idxRegRsp)
6628 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6629
6630 /* The value variable is implictly flushed. */
6631 if (idxRegValue != UINT8_MAX)
6632 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6633 iemNativeVarFreeLocal(pReNative, idxVarValue);
6634
6635 return off;
6636}
6637
6638
6639
6640/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6641#define IEM_MC_POP_GREG_U16(a_iGReg) \
6642 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6643 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6644#define IEM_MC_POP_GREG_U32(a_iGReg) \
6645 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6646 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6647#define IEM_MC_POP_GREG_U64(a_iGReg) \
6648 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6649 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6650
6651#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6652 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6653 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6654#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6655 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6656 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6657
6658#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6659 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6660 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6661#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6662 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6663 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6664
6665
6666DECL_FORCE_INLINE_THROW(uint32_t)
6667iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6668 uint8_t idxRegTmp)
6669{
6670 /* Use16BitSp: */
6671#ifdef RT_ARCH_AMD64
6672 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6673 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6674 RT_NOREF(idxRegTmp);
6675#else
6676 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6677 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6678 /* add tmp, regrsp, #cbMem */
6679 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6680 /* and tmp, tmp, #0xffff */
6681 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6682 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6683 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6684 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6685#endif
6686 return off;
6687}
6688
6689
6690DECL_FORCE_INLINE(uint32_t)
6691iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6692{
6693 /* Use32BitSp: */
6694 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6695 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6696 return off;
6697}
6698
6699
6700/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6701DECL_INLINE_THROW(uint32_t)
6702iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6703 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6704{
6705 /*
6706 * Assert sanity.
6707 */
6708 Assert(idxGReg < 16);
6709#ifdef VBOX_STRICT
6710 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6711 {
6712 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6713 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6714 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6715 Assert( pfnFunction
6716 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6717 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6718 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6719 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6720 : UINT64_C(0xc000b000a0009000) ));
6721 }
6722 else
6723 Assert( pfnFunction
6724 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6725 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6726 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6727 : UINT64_C(0xc000b000a0009000) ));
6728#endif
6729
6730#ifdef VBOX_STRICT
6731 /*
6732 * Check that the fExec flags we've got make sense.
6733 */
6734 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6735#endif
6736
6737 /*
6738 * To keep things simple we have to commit any pending writes first as we
6739 * may end up making calls.
6740 */
6741 off = iemNativeRegFlushPendingWrites(pReNative, off);
6742
6743 /*
6744 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6745 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6746 * directly as the effective stack pointer.
6747 * (Code structure is very similar to that of PUSH)
6748 */
6749 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6750 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6751 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6752 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6753 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6754 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6755 * will be the resulting register value. */
6756 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6757
6758 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6759 if (cBitsFlat != 0)
6760 {
6761 Assert(idxRegEffSp == idxRegRsp);
6762 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6763 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6764 }
6765 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6766 {
6767 Assert(idxRegEffSp != idxRegRsp);
6768 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6769 kIemNativeGstRegUse_ReadOnly);
6770#ifdef RT_ARCH_AMD64
6771 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6772#else
6773 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6774#endif
6775 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6776 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6777 offFixupJumpToUseOtherBitSp = off;
6778 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6779 {
6780/** @todo can skip idxRegRsp updating when popping ESP. */
6781 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6782 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6783 }
6784 else
6785 {
6786 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6787 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6788 }
6789 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6790 }
6791 /* SpUpdateEnd: */
6792 uint32_t const offLabelSpUpdateEnd = off;
6793
6794 /*
6795 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6796 * we're skipping lookup).
6797 */
6798 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6799 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6800 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6801 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6802 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6803 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6804 : UINT32_MAX;
6805
6806 if (!TlbState.fSkip)
6807 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6808 else
6809 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6810
6811 /*
6812 * Use16BitSp:
6813 */
6814 if (cBitsFlat == 0)
6815 {
6816#ifdef RT_ARCH_AMD64
6817 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6818#else
6819 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6820#endif
6821 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6822 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6823 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6824 else
6825 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6826 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6827 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6828 }
6829
6830 /*
6831 * TlbMiss:
6832 *
6833 * Call helper to do the pushing.
6834 */
6835 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6836
6837#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6838 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6839#else
6840 RT_NOREF(idxInstr);
6841#endif
6842
6843 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6844 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6845 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6846 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6847
6848
6849 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6850 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6852
6853 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6855
6856 /* Done setting up parameters, make the call. */
6857 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6858
6859 /* Move the return register content to idxRegMemResult. */
6860 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6862
6863 /* Restore variables and guest shadow registers to volatile registers. */
6864 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6865 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6866
6867#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6868 if (!TlbState.fSkip)
6869 {
6870 /* end of TlbMiss - Jump to the done label. */
6871 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6872 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6873
6874 /*
6875 * TlbLookup:
6876 */
6877 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6878 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6879
6880 /*
6881 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6882 */
6883 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6884# ifdef VBOX_WITH_STATISTICS
6885 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6886 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6887# endif
6888 switch (cbMem)
6889 {
6890 case 2:
6891 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6892 break;
6893 case 4:
6894 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6895 break;
6896 case 8:
6897 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6898 break;
6899 default:
6900 AssertFailed();
6901 }
6902
6903 TlbState.freeRegsAndReleaseVars(pReNative);
6904
6905 /*
6906 * TlbDone:
6907 *
6908 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6909 * commit the popped register value.
6910 */
6911 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6912 }
6913#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6914
6915 if (idxGReg != X86_GREG_xSP)
6916 {
6917 /* Set the register. */
6918 if (cbMem >= sizeof(uint32_t))
6919 {
6920#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6921 AssertMsg( pReNative->idxCurCall == 0
6922 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6923 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6924#endif
6925 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6926#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6927 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6928#endif
6929#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6930 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6931 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6932#endif
6933 }
6934 else
6935 {
6936 Assert(cbMem == sizeof(uint16_t));
6937 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6938 kIemNativeGstRegUse_ForUpdate);
6939 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6940#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6941 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6942#endif
6943 iemNativeRegFreeTmp(pReNative, idxRegDst);
6944 }
6945
6946 /* Complete RSP calculation for FLAT mode. */
6947 if (idxRegEffSp == idxRegRsp)
6948 {
6949 if (cBitsFlat == 64)
6950 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6951 else
6952 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6953 }
6954 }
6955 else
6956 {
6957 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6958 if (cbMem == sizeof(uint64_t))
6959 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6960 else if (cbMem == sizeof(uint32_t))
6961 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6962 else
6963 {
6964 if (idxRegEffSp == idxRegRsp)
6965 {
6966 if (cBitsFlat == 64)
6967 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6968 else
6969 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6970 }
6971 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6972 }
6973 }
6974
6975#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6976 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6977#endif
6978
6979 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6980 if (idxRegEffSp != idxRegRsp)
6981 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6982 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6983
6984 return off;
6985}
6986
6987
6988
6989/*********************************************************************************************************************************
6990* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6991*********************************************************************************************************************************/
6992
6993#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6994 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6995 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6996 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6997
6998#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6999 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7000 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7001 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
7002
7003#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7004 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7005 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7006 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
7007
7008#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7009 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7010 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7011 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
7012
7013
7014#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7016 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7017 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
7018
7019#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7021 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7022 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
7023
7024#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7025 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7026 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7027 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7028
7029#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7030 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7031 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7032 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
7033
7034#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7035 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
7036 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7037 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7038
7039
7040#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7042 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7043 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
7044
7045#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7046 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7047 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7048 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
7049
7050#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7051 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7052 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7053 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7054
7055#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7056 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7057 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7058 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
7059
7060#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7061 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
7062 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7063 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
7064
7065
7066#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7067 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7068 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7069 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
7070
7071#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7072 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7073 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7074 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
7075#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7076 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7077 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7078 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7079
7080#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7081 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7082 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7083 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
7084
7085#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7086 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
7087 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7088 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7089
7090
7091#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7092 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7093 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7094 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
7095
7096#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7097 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7098 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7099 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
7100
7101
7102#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7103 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7104 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7105 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
7106
7107#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7109 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7110 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
7111
7112#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7113 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7114 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7115 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
7116
7117#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7118 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7119 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7120 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
7121
7122
7123
7124#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7125 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7126 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7127 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
7128
7129#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7130 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7131 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7132 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
7133
7134#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7135 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7136 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7137 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
7138
7139#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7140 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7141 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7142 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
7143
7144
7145#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7146 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7147 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7148 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
7149
7150#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7151 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7152 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7153 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
7154
7155#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7156 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7157 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7158 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7159
7160#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7161 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7162 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7163 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
7164
7165#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
7166 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
7167 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7168 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7169
7170
7171#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7172 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7173 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7174 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
7175
7176#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7177 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7178 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7179 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
7180
7181#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7182 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7183 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7184 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7185
7186#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7187 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7188 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7189 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7190
7191#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7192 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7193 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7194 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7195
7196
7197#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7198 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7199 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7200 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7201
7202#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7203 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7204 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7205 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7206
7207#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7208 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7209 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7210 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7211
7212#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7213 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7214 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7215 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7216
7217#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7218 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7219 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7220 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7221
7222
7223#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7224 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7225 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7226 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7227
7228#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7229 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7230 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7231 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7232
7233
7234#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7235 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7236 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7237 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7238
7239#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7241 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7242 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7243
7244#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7245 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7246 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7247 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7248
7249#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7250 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7251 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7252 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7253
7254
7255DECL_INLINE_THROW(uint32_t)
7256iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7257 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7258 uintptr_t pfnFunction, uint8_t idxInstr)
7259{
7260 /*
7261 * Assert sanity.
7262 */
7263 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7264 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7265 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7266 && pVarMem->cbVar == sizeof(void *),
7267 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7268
7269 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7271 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7272 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7273 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7274
7275 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7276 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7277 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7278 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7279 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7280
7281 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7282
7283 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7284
7285#ifdef VBOX_STRICT
7286# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7287 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7288 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7289 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7290 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7291# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7292 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7293 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7294 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7295
7296 if (iSegReg == UINT8_MAX)
7297 {
7298 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7299 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7300 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7301 switch (cbMem)
7302 {
7303 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7304 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7305 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7306 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7307 case 10:
7308 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7309 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7310 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7311 break;
7312 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7313# if 0
7314 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7315 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7316# endif
7317 default: AssertFailed(); break;
7318 }
7319 }
7320 else
7321 {
7322 Assert(iSegReg < 6);
7323 switch (cbMem)
7324 {
7325 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7326 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7327 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7328 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7329 case 10:
7330 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7331 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7332 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7333 break;
7334 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7335# if 0
7336 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7337 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7338# endif
7339 default: AssertFailed(); break;
7340 }
7341 }
7342# undef IEM_MAP_HLP_FN
7343# undef IEM_MAP_HLP_FN_NO_AT
7344#endif
7345
7346#ifdef VBOX_STRICT
7347 /*
7348 * Check that the fExec flags we've got make sense.
7349 */
7350 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7351#endif
7352
7353 /*
7354 * To keep things simple we have to commit any pending writes first as we
7355 * may end up making calls.
7356 */
7357 off = iemNativeRegFlushPendingWrites(pReNative, off);
7358
7359#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7360 /*
7361 * Move/spill/flush stuff out of call-volatile registers.
7362 * This is the easy way out. We could contain this to the tlb-miss branch
7363 * by saving and restoring active stuff here.
7364 */
7365 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7366 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7367#endif
7368
7369 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7370 while the tlb-miss codepath will temporarily put it on the stack.
7371 Set the the type to stack here so we don't need to do it twice below. */
7372 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7373 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7374 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7375 * lookup is done. */
7376
7377 /*
7378 * Define labels and allocate the result register (trying for the return
7379 * register if we can).
7380 */
7381 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7382 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7383 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7384 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7385 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7386 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7387 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7388 : UINT32_MAX;
7389//off=iemNativeEmitBrk(pReNative, off, 0);
7390 /*
7391 * Jump to the TLB lookup code.
7392 */
7393 if (!TlbState.fSkip)
7394 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7395
7396 /*
7397 * TlbMiss:
7398 *
7399 * Call helper to do the fetching.
7400 * We flush all guest register shadow copies here.
7401 */
7402 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7403
7404#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7405 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7406#else
7407 RT_NOREF(idxInstr);
7408#endif
7409
7410#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7411 /* Save variables in volatile registers. */
7412 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7413 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7414#endif
7415
7416 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7417 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7418#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7419 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7420#else
7421 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7422#endif
7423
7424 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7425 if (iSegReg != UINT8_MAX)
7426 {
7427 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7428 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7429 }
7430
7431 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7432 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7433 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7434
7435 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7436 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7437
7438 /* Done setting up parameters, make the call. */
7439 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7440
7441 /*
7442 * Put the output in the right registers.
7443 */
7444 Assert(idxRegMemResult == pVarMem->idxReg);
7445 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7446 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7447
7448#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7449 /* Restore variables and guest shadow registers to volatile registers. */
7450 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7451 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7452#endif
7453
7454 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7455 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7456
7457#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7458 if (!TlbState.fSkip)
7459 {
7460 /* end of tlbsmiss - Jump to the done label. */
7461 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7462 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7463
7464 /*
7465 * TlbLookup:
7466 */
7467 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7468 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7469# ifdef VBOX_WITH_STATISTICS
7470 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7471 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7472# endif
7473
7474 /* [idxVarUnmapInfo] = 0; */
7475 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7476
7477 /*
7478 * TlbDone:
7479 */
7480 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7481
7482 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7483
7484# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7485 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7486 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7487# endif
7488 }
7489#else
7490 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7491#endif
7492
7493 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7494 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7495
7496 return off;
7497}
7498
7499
7500#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7501 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7502 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7503
7504#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7505 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7506 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7507
7508#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7509 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7510 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7511
7512#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7513 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7514 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7515
7516DECL_INLINE_THROW(uint32_t)
7517iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7518 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7519{
7520 /*
7521 * Assert sanity.
7522 */
7523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7524#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7525 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7526#endif
7527 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7528 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7529 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7530#ifdef VBOX_STRICT
7531 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7532 {
7533 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7534 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7535 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7536 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7537 case IEM_ACCESS_TYPE_WRITE:
7538 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7539 case IEM_ACCESS_TYPE_READ:
7540 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7541 default: AssertFailed();
7542 }
7543#else
7544 RT_NOREF(fAccess);
7545#endif
7546
7547 /*
7548 * To keep things simple we have to commit any pending writes first as we
7549 * may end up making calls (there shouldn't be any at this point, so this
7550 * is just for consistency).
7551 */
7552 /** @todo we could postpone this till we make the call and reload the
7553 * registers after returning from the call. Not sure if that's sensible or
7554 * not, though. */
7555 off = iemNativeRegFlushPendingWrites(pReNative, off);
7556
7557 /*
7558 * Move/spill/flush stuff out of call-volatile registers.
7559 *
7560 * We exclude any register holding the bUnmapInfo variable, as we'll be
7561 * checking it after returning from the call and will free it afterwards.
7562 */
7563 /** @todo save+restore active registers and maybe guest shadows in miss
7564 * scenario. */
7565 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7566 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7567
7568 /*
7569 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7570 * to call the unmap helper function.
7571 *
7572 * The likelyhood of it being zero is higher than for the TLB hit when doing
7573 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7574 * access should also end up with a mapping that won't need special unmapping.
7575 */
7576 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7577 * should speed up things for the pure interpreter as well when TLBs
7578 * are enabled. */
7579#ifdef RT_ARCH_AMD64
7580 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7581 {
7582 /* test byte [rbp - xxx], 0ffh */
7583 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7584 pbCodeBuf[off++] = 0xf6;
7585 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7586 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7587 pbCodeBuf[off++] = 0xff;
7588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7589 }
7590 else
7591#endif
7592 {
7593 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7594 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7595 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7596 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7597 }
7598 uint32_t const offJmpFixup = off;
7599 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7600
7601 /*
7602 * Call the unmap helper function.
7603 */
7604#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7605 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7606#else
7607 RT_NOREF(idxInstr);
7608#endif
7609
7610 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7611 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7612 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7613
7614 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7615 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7616
7617 /* Done setting up parameters, make the call. */
7618 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7619
7620 /* The bUnmapInfo variable is implictly free by these MCs. */
7621 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7622
7623 /*
7624 * Done, just fixup the jump for the non-call case.
7625 */
7626 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7627
7628 return off;
7629}
7630
7631
7632
7633/*********************************************************************************************************************************
7634* State and Exceptions *
7635*********************************************************************************************************************************/
7636
7637#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7638#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7639
7640#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7641#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7642#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7643
7644#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7645#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7646#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7647
7648
7649DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7650{
7651 /** @todo this needs a lot more work later. */
7652 RT_NOREF(pReNative, fForChange);
7653 return off;
7654}
7655
7656
7657
7658/*********************************************************************************************************************************
7659* Emitters for FPU related operations. *
7660*********************************************************************************************************************************/
7661
7662#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7663 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7664
7665/** Emits code for IEM_MC_FETCH_FCW. */
7666DECL_INLINE_THROW(uint32_t)
7667iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7668{
7669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7670 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7671
7672 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7673
7674 /* Allocate a temporary FCW register. */
7675 /** @todo eliminate extra register */
7676 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7677 kIemNativeGstRegUse_ReadOnly);
7678
7679 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7680
7681 /* Free but don't flush the FCW register. */
7682 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7683 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7684
7685 return off;
7686}
7687
7688
7689#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7690 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7691
7692/** Emits code for IEM_MC_FETCH_FSW. */
7693DECL_INLINE_THROW(uint32_t)
7694iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7695{
7696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7697 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7698
7699 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7700 /* Allocate a temporary FSW register. */
7701 /** @todo eliminate extra register */
7702 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7703 kIemNativeGstRegUse_ReadOnly);
7704
7705 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7706
7707 /* Free but don't flush the FSW register. */
7708 iemNativeRegFreeTmp(pReNative, idxFswReg);
7709 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7710
7711 return off;
7712}
7713
7714
7715
7716#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7717
7718
7719/*********************************************************************************************************************************
7720* Emitters for SSE/AVX specific operations. *
7721*********************************************************************************************************************************/
7722
7723#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7724 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7725
7726/** Emits code for IEM_MC_COPY_XREG_U128. */
7727DECL_INLINE_THROW(uint32_t)
7728iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7729{
7730 /* This is a nop if the source and destination register are the same. */
7731 if (iXRegDst != iXRegSrc)
7732 {
7733 /* Allocate destination and source register. */
7734 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7735 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7736 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7737 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7738
7739 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7740
7741 /* Free but don't flush the source and destination register. */
7742 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7743 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7744 }
7745
7746 return off;
7747}
7748
7749
7750#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7751 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7752
7753/** Emits code for IEM_MC_FETCH_XREG_U128. */
7754DECL_INLINE_THROW(uint32_t)
7755iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7756{
7757 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7758 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7759
7760 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7761 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7762
7763 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7764
7765 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7766
7767 /* Free but don't flush the source register. */
7768 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7769 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7770
7771 return off;
7772}
7773
7774
7775#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7776 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7777
7778#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
7779 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
7780
7781/** Emits code for IEM_MC_FETCH_XREG_U64. */
7782DECL_INLINE_THROW(uint32_t)
7783iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7784{
7785 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7786 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7787
7788 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7789 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7790
7791 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7792 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7793
7794 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7795
7796 /* Free but don't flush the source register. */
7797 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7798 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7799
7800 return off;
7801}
7802
7803
7804#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
7805 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
7806
7807#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
7808 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
7809
7810/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
7811DECL_INLINE_THROW(uint32_t)
7812iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7813{
7814 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7815 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7816
7817 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7818 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7819
7820 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7821 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7822
7823 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7824
7825 /* Free but don't flush the source register. */
7826 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7827 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7828
7829 return off;
7830}
7831
7832
7833#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7834 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7835
7836/** Emits code for IEM_MC_FETCH_XREG_U16. */
7837DECL_INLINE_THROW(uint32_t)
7838iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7839{
7840 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7841 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7842
7843 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7844 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7845
7846 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7847 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7848
7849 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7850
7851 /* Free but don't flush the source register. */
7852 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7853 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7854
7855 return off;
7856}
7857
7858
7859#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7860 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7861
7862/** Emits code for IEM_MC_FETCH_XREG_U8. */
7863DECL_INLINE_THROW(uint32_t)
7864iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7865{
7866 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7867 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7868
7869 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7870 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7871
7872 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7873 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7874
7875 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7876
7877 /* Free but don't flush the source register. */
7878 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7879 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7880
7881 return off;
7882}
7883
7884
7885#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7886 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7887
7888AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7889#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7890 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7891
7892
7893/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7894DECL_INLINE_THROW(uint32_t)
7895iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7896{
7897 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7898 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7899
7900 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7901 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7902 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7903
7904 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7905
7906 /* Free but don't flush the source register. */
7907 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7908 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7909
7910 return off;
7911}
7912
7913
7914#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7915 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7916
7917#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7918 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7919
7920#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7921 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7922
7923#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7924 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7925
7926#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7927 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7928
7929#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7930 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7931
7932/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7933DECL_INLINE_THROW(uint32_t)
7934iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
7935 uint8_t cbLocal, uint8_t iElem)
7936{
7937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7938 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7939
7940#ifdef VBOX_STRICT
7941 switch (cbLocal)
7942 {
7943 case sizeof(uint64_t): Assert(iElem < 2); break;
7944 case sizeof(uint32_t): Assert(iElem < 4); break;
7945 case sizeof(uint16_t): Assert(iElem < 8); break;
7946 case sizeof(uint8_t): Assert(iElem < 16); break;
7947 default: AssertFailed();
7948 }
7949#endif
7950
7951 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7952 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7953 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7954
7955 switch (cbLocal)
7956 {
7957 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7958 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7959 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7960 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7961 default: AssertFailed();
7962 }
7963
7964 /* Free but don't flush the source register. */
7965 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7966 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7967
7968 return off;
7969}
7970
7971
7972#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7973 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7974
7975/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
7976DECL_INLINE_THROW(uint32_t)
7977iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7978{
7979 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7980 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7981
7982 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7983 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7984 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7985
7986 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7987 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7988 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7989
7990 /* Free but don't flush the source register. */
7991 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7992 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7993
7994 return off;
7995}
7996
7997
7998#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7999 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
8000
8001/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
8002DECL_INLINE_THROW(uint32_t)
8003iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
8004{
8005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8006 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8007
8008 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8009 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8010 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8011
8012 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
8013 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8014 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8015
8016 /* Free but don't flush the source register. */
8017 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8018 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8019
8020 return off;
8021}
8022
8023
8024#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
8025 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
8026
8027/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
8028DECL_INLINE_THROW(uint32_t)
8029iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
8030 uint8_t idxSrcVar, uint8_t iDwSrc)
8031{
8032 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8033 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8034
8035 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8036 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8037 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8038
8039 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
8040 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
8041
8042 /* Free but don't flush the destination register. */
8043 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8044 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8045
8046 return off;
8047}
8048
8049
8050#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8051 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8052
8053/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
8054DECL_INLINE_THROW(uint32_t)
8055iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8056{
8057 /*
8058 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8059 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8060 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8061 */
8062 if (iYRegDst != iYRegSrc)
8063 {
8064 /* Allocate destination and source register. */
8065 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8066 kIemNativeGstSimdRegLdStSz_256,
8067 kIemNativeGstRegUse_ForFullWrite);
8068 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8069 kIemNativeGstSimdRegLdStSz_Low128,
8070 kIemNativeGstRegUse_ReadOnly);
8071
8072 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8073 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8074
8075 /* Free but don't flush the source and destination register. */
8076 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8077 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8078 }
8079 else
8080 {
8081 /* This effectively only clears the upper 128-bits of the register. */
8082 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8083 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8084
8085 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8086
8087 /* Free but don't flush the destination register. */
8088 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8089 }
8090
8091 return off;
8092}
8093
8094
8095#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8096 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8097
8098/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
8099DECL_INLINE_THROW(uint32_t)
8100iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8101{
8102 /*
8103 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8104 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8105 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8106 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
8107 */
8108 if (iYRegDst != iYRegSrc)
8109 {
8110 /* Allocate destination and source register. */
8111 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8112 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8113 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8114 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8115
8116 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8117
8118 /* Free but don't flush the source and destination register. */
8119 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8120 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8121 }
8122
8123 return off;
8124}
8125
8126
8127#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
8128 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
8129
8130/** Emits code for IEM_MC_FETCH_YREG_U128. */
8131DECL_INLINE_THROW(uint32_t)
8132iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
8133{
8134 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8135 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8136
8137 Assert(iDQWord <= 1);
8138 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8139 iDQWord == 1
8140 ? kIemNativeGstSimdRegLdStSz_High128
8141 : kIemNativeGstSimdRegLdStSz_Low128,
8142 kIemNativeGstRegUse_ReadOnly);
8143
8144 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8145 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8146
8147 if (iDQWord == 1)
8148 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8149 else
8150 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8151
8152 /* Free but don't flush the source register. */
8153 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8154 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8155
8156 return off;
8157}
8158
8159
8160#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
8161 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
8162
8163/** Emits code for IEM_MC_FETCH_YREG_U64. */
8164DECL_INLINE_THROW(uint32_t)
8165iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
8166{
8167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8168 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8169
8170 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8171 iQWord >= 2
8172 ? kIemNativeGstSimdRegLdStSz_High128
8173 : kIemNativeGstSimdRegLdStSz_Low128,
8174 kIemNativeGstRegUse_ReadOnly);
8175
8176 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8177 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8178
8179 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8180
8181 /* Free but don't flush the source register. */
8182 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8183 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8184
8185 return off;
8186}
8187
8188
8189#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8190 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8191
8192/** Emits code for IEM_MC_FETCH_YREG_U32. */
8193DECL_INLINE_THROW(uint32_t)
8194iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8195{
8196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8197 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8198
8199 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8200 iDWord >= 4
8201 ? kIemNativeGstSimdRegLdStSz_High128
8202 : kIemNativeGstSimdRegLdStSz_Low128,
8203 kIemNativeGstRegUse_ReadOnly);
8204
8205 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8206 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8207
8208 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8209
8210 /* Free but don't flush the source register. */
8211 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8212 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8213
8214 return off;
8215}
8216
8217
8218#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8219 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8220
8221/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8222DECL_INLINE_THROW(uint32_t)
8223iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8224{
8225 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8226 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8227
8228 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8229
8230 /* Free but don't flush the register. */
8231 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8232
8233 return off;
8234}
8235
8236
8237#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8238 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8239
8240/** Emits code for IEM_MC_STORE_YREG_U128. */
8241DECL_INLINE_THROW(uint32_t)
8242iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8243{
8244 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8245 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8246
8247 Assert(iDQword <= 1);
8248 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8249 iDQword == 0
8250 ? kIemNativeGstSimdRegLdStSz_Low128
8251 : kIemNativeGstSimdRegLdStSz_High128,
8252 kIemNativeGstRegUse_ForFullWrite);
8253
8254 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8255
8256 if (iDQword == 0)
8257 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8258 else
8259 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8260
8261 /* Free but don't flush the source register. */
8262 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8263 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8264
8265 return off;
8266}
8267
8268
8269#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8270 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8271
8272/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8273DECL_INLINE_THROW(uint32_t)
8274iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8275{
8276 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8277 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8278
8279 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8280 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8281
8282 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8283
8284 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8285 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8286
8287 /* Free but don't flush the source register. */
8288 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8289 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8290
8291 return off;
8292}
8293
8294
8295#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8296 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8297
8298/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8299DECL_INLINE_THROW(uint32_t)
8300iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8301{
8302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8303 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8304
8305 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8306 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8307
8308 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8309
8310 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8311 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8312
8313 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8314 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8315
8316 return off;
8317}
8318
8319
8320#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8321 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8322
8323/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8324DECL_INLINE_THROW(uint32_t)
8325iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8326{
8327 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8328 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8329
8330 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8331 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8332
8333 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8334
8335 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8336 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8337
8338 /* Free but don't flush the source register. */
8339 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8340 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8341
8342 return off;
8343}
8344
8345
8346#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8347 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8348
8349/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8350DECL_INLINE_THROW(uint32_t)
8351iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8352{
8353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8354 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8355
8356 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8357 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8358
8359 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8360
8361 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8362 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8363
8364 /* Free but don't flush the source register. */
8365 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8366 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8367
8368 return off;
8369}
8370
8371
8372#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8373 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8374
8375/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8376DECL_INLINE_THROW(uint32_t)
8377iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8378{
8379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8380 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8381
8382 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8383 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8384
8385 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8386
8387 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8388 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8389
8390 /* Free but don't flush the source register. */
8391 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8392 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8393
8394 return off;
8395}
8396
8397
8398#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8399 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8400
8401/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8402DECL_INLINE_THROW(uint32_t)
8403iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8404{
8405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8406 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8407
8408 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8409 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8410
8411 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8412
8413 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8414
8415 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8416 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8417
8418 return off;
8419}
8420
8421
8422#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8423 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8424
8425/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8426DECL_INLINE_THROW(uint32_t)
8427iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8428{
8429 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8430 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8431
8432 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8433 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8434
8435 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8436
8437 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8438
8439 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8440 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8441
8442 return off;
8443}
8444
8445
8446#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8447 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8448
8449/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8450DECL_INLINE_THROW(uint32_t)
8451iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8452{
8453 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8454 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8455
8456 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8457 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8458
8459 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8460
8461 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8462
8463 /* Free but don't flush the source register. */
8464 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8465 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8466
8467 return off;
8468}
8469
8470
8471#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8472 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8473
8474/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8475DECL_INLINE_THROW(uint32_t)
8476iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8477{
8478 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8479 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8480
8481 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8482 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8483
8484 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8485
8486 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8487
8488 /* Free but don't flush the source register. */
8489 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8490 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8491
8492 return off;
8493}
8494
8495
8496#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8497 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8498
8499/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8500DECL_INLINE_THROW(uint32_t)
8501iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8502{
8503 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8504 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8505
8506 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8507 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8508
8509 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8510
8511 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8512
8513 /* Free but don't flush the source register. */
8514 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8515 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8516
8517 return off;
8518}
8519
8520
8521#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8522 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8523
8524/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8525DECL_INLINE_THROW(uint32_t)
8526iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8527{
8528 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8529 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8530
8531 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8532 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8533
8534 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8535
8536 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8537 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8538
8539 /* Free but don't flush the source register. */
8540 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8541 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8542
8543 return off;
8544}
8545
8546
8547#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8548 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8549
8550/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8551DECL_INLINE_THROW(uint32_t)
8552iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8553{
8554 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8555 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8556
8557 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8558 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8559
8560 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8561
8562 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8563 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8564
8565 /* Free but don't flush the source register. */
8566 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8567 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8568
8569 return off;
8570}
8571
8572
8573#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8574 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8575
8576/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8577DECL_INLINE_THROW(uint32_t)
8578iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8579{
8580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8581 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8582
8583 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8584 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8585 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8586 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8587 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8588
8589 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8590 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8591 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8592
8593 /* Free but don't flush the source and destination registers. */
8594 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8595 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8596 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8597
8598 return off;
8599}
8600
8601
8602#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8603 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8604
8605/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8606DECL_INLINE_THROW(uint32_t)
8607iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8608{
8609 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8610 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8611
8612 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8613 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8614 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8615 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8616 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8617
8618 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8619 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8620 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8621
8622 /* Free but don't flush the source and destination registers. */
8623 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8624 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8625 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8626
8627 return off;
8628}
8629
8630
8631#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8632 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8633
8634
8635/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8636DECL_INLINE_THROW(uint32_t)
8637iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8638{
8639 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8640 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8641
8642 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8643 if (bImm8Mask & RT_BIT(0))
8644 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8645 if (bImm8Mask & RT_BIT(1))
8646 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8647 if (bImm8Mask & RT_BIT(2))
8648 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8649 if (bImm8Mask & RT_BIT(3))
8650 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8651
8652 /* Free but don't flush the destination register. */
8653 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8654
8655 return off;
8656}
8657
8658
8659#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8660 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8661
8662
8663/** Emits code for IEM_MC_FETCH_YREG_U256. */
8664DECL_INLINE_THROW(uint32_t)
8665iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8666{
8667 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8668 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8669
8670 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8671 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8672 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8673
8674 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8675
8676 /* Free but don't flush the source register. */
8677 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8678 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8679
8680 return off;
8681}
8682
8683
8684#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8685 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8686
8687
8688/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8689DECL_INLINE_THROW(uint32_t)
8690iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8691{
8692 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8693 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8694
8695 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8696 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8697 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8698
8699 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8700
8701 /* Free but don't flush the source register. */
8702 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8703 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8704
8705 return off;
8706}
8707
8708
8709#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
8710 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
8711
8712
8713/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
8714DECL_INLINE_THROW(uint32_t)
8715iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
8716 uint8_t idxSrcVar, uint8_t iDwSrc)
8717{
8718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8719 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8720
8721 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8722 iDwDst < 4
8723 ? kIemNativeGstSimdRegLdStSz_Low128
8724 : kIemNativeGstSimdRegLdStSz_High128,
8725 kIemNativeGstRegUse_ForUpdate);
8726 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8727 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8728
8729 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
8730 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
8731
8732 /* Free but don't flush the source register. */
8733 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8734 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8735 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8736
8737 return off;
8738}
8739
8740
8741#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
8742 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
8743
8744
8745/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
8746DECL_INLINE_THROW(uint32_t)
8747iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
8748 uint8_t idxSrcVar, uint8_t iQwSrc)
8749{
8750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8751 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8752
8753 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8754 iQwDst < 2
8755 ? kIemNativeGstSimdRegLdStSz_Low128
8756 : kIemNativeGstSimdRegLdStSz_High128,
8757 kIemNativeGstRegUse_ForUpdate);
8758 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8759 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8760
8761 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
8762 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
8763
8764 /* Free but don't flush the source register. */
8765 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8766 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8767 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8768
8769 return off;
8770}
8771
8772
8773#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
8774 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
8775
8776
8777/** Emits code for IEM_MC_STORE_YREG_U64. */
8778DECL_INLINE_THROW(uint32_t)
8779iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
8780{
8781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8782 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8783
8784 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8785 iQwDst < 2
8786 ? kIemNativeGstSimdRegLdStSz_Low128
8787 : kIemNativeGstSimdRegLdStSz_High128,
8788 kIemNativeGstRegUse_ForUpdate);
8789
8790 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8791
8792 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
8793
8794 /* Free but don't flush the source register. */
8795 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8796 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8797
8798 return off;
8799}
8800
8801
8802#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
8803 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
8804
8805/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
8806DECL_INLINE_THROW(uint32_t)
8807iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8808{
8809 RT_NOREF(pReNative, iYReg);
8810 /** @todo Needs to be implemented when support for AVX-512 is added. */
8811 return off;
8812}
8813
8814
8815
8816/*********************************************************************************************************************************
8817* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8818*********************************************************************************************************************************/
8819
8820/**
8821 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
8822 */
8823DECL_INLINE_THROW(uint32_t)
8824iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8825{
8826 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
8827 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
8828 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8829 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8830
8831 /*
8832 * Need to do the FPU preparation.
8833 */
8834 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8835
8836 /*
8837 * Do all the call setup and cleanup.
8838 */
8839 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
8840
8841 /*
8842 * Load the MXCSR register into the first argument and mask out the current exception flags.
8843 */
8844 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
8845 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
8846
8847 /*
8848 * Make the call.
8849 */
8850 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8851
8852 /*
8853 * The updated MXCSR is in the return register.
8854 */
8855 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
8856
8857#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8858 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8859 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8860#endif
8861 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8862
8863 return off;
8864}
8865
8866
8867#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8868 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8869
8870/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8871DECL_INLINE_THROW(uint32_t)
8872iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8873{
8874 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8875 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8876 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8877}
8878
8879
8880#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8881 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8882
8883/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8884DECL_INLINE_THROW(uint32_t)
8885iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8886{
8887 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8888 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8889 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8890 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8891}
8892
8893
8894/*********************************************************************************************************************************
8895* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8896*********************************************************************************************************************************/
8897
8898#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8899 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8900
8901/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8902DECL_INLINE_THROW(uint32_t)
8903iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8904{
8905 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8906 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8907 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8908}
8909
8910
8911#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8912 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8913
8914/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8915DECL_INLINE_THROW(uint32_t)
8916iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8917{
8918 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8919 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8920 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8921 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8922}
8923#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8924
8925
8926/*********************************************************************************************************************************
8927* Include instruction emitters. *
8928*********************************************************************************************************************************/
8929#include "target-x86/IEMAllN8veEmit-x86.h"
8930
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette