VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104133

Last change on this file since 104133 was 104133, checked in by vboxsync, 13 months ago

VMM/IEM: Convert the 256-bit vmovsldup/vmovshdup/vmovddup emulations to microcode, bugref:10641

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 425.4 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104133 2024-04-03 12:03:22Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(idxInstr);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
365 }
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToRip64AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToEip32AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442
443 /* Free but don't flush the PC register. */
444 iemNativeRegFreeTmp(pReNative, idxPcReg);
445#endif
446
447#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
448 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
449
450 pReNative->Core.offPc += cbInstr;
451# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
452 off = iemNativePcAdjustCheck(pReNative, off);
453# endif
454 if (pReNative->cCondDepth)
455 off = iemNativeEmitPcWriteback(pReNative, off);
456 else
457 pReNative->Core.cInstrPcUpdateSkipped++;
458#endif
459
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
472
473/** Same as iemRegAddToIp16AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
490
491 /* Free but don't flush the PC register. */
492 iemNativeRegFreeTmp(pReNative, idxPcReg);
493#endif
494
495#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
496 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
497
498 pReNative->Core.offPc += cbInstr;
499# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
500 off = iemNativePcAdjustCheck(pReNative, off);
501# endif
502 if (pReNative->cCondDepth)
503 off = iemNativeEmitPcWriteback(pReNative, off);
504 else
505 pReNative->Core.cInstrPcUpdateSkipped++;
506#endif
507
508 return off;
509}
510
511
512
513/*********************************************************************************************************************************
514* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
515*********************************************************************************************************************************/
516
517#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
518 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
519 (a_enmEffOpSize), pCallEntry->idxInstr); \
520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
521
522#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
523 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
524 (a_enmEffOpSize), pCallEntry->idxInstr); \
525 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
526 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
527
528#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
529 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
530 IEMMODE_16BIT, pCallEntry->idxInstr); \
531 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
532
533#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
534 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
535 IEMMODE_16BIT, pCallEntry->idxInstr); \
536 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
537 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
538
539#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
540 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
541 IEMMODE_64BIT, pCallEntry->idxInstr); \
542 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
543
544#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
545 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
546 IEMMODE_64BIT, pCallEntry->idxInstr); \
547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
549
550/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
551 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
552 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
553DECL_INLINE_THROW(uint32_t)
554iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
555 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
556{
557 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
558
559 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
560 off = iemNativeRegFlushPendingWrites(pReNative, off);
561
562#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
563 Assert(pReNative->Core.offPc == 0);
564
565 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
566#endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition. */
572 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
573
574 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
575 {
576 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
578 }
579 else
580 {
581 /* Just truncate the result to 16-bit IP. */
582 Assert(enmEffOpSize == IEMMODE_16BIT);
583 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
584 }
585 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
586
587 /* Free but don't flush the PC register. */
588 iemNativeRegFreeTmp(pReNative, idxPcReg);
589
590 return off;
591}
592
593
594#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
595 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
596 (a_enmEffOpSize), pCallEntry->idxInstr); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
603 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
604
605#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
606 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
607 IEMMODE_16BIT, pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
615
616#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
618 IEMMODE_32BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_32BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
626
627/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
628 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
629 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
630DECL_INLINE_THROW(uint32_t)
631iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
632 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
633{
634 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
635
636 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
637 off = iemNativeRegFlushPendingWrites(pReNative, off);
638
639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
640 Assert(pReNative->Core.offPc == 0);
641
642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
643#endif
644
645 /* Allocate a temporary PC register. */
646 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
647
648 /* Perform the addition. */
649 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
650
651 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
652 if (enmEffOpSize == IEMMODE_16BIT)
653 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
654
655 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
656/** @todo we can skip this in 32-bit FLAT mode. */
657 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
658
659 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
660
661 /* Free but don't flush the PC register. */
662 iemNativeRegFreeTmp(pReNative, idxPcReg);
663
664 return off;
665}
666
667
668#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
669 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
671
672#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
676
677#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
678 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
680
681#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
682 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
683 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
684 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
685
686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
687 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
689
690#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
692 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
694
695/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
696DECL_INLINE_THROW(uint32_t)
697iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
698 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
699{
700 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
701 off = iemNativeRegFlushPendingWrites(pReNative, off);
702
703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
704 Assert(pReNative->Core.offPc == 0);
705
706 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
707#endif
708
709 /* Allocate a temporary PC register. */
710 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
711
712 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
713 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
714 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
715 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
716 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
717
718 /* Free but don't flush the PC register. */
719 iemNativeRegFreeTmp(pReNative, idxPcReg);
720
721 return off;
722}
723
724
725
726/*********************************************************************************************************************************
727* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
728*********************************************************************************************************************************/
729
730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
731#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
732 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
733
734/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
735#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
736 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
743 * clears flags. */
744#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
745 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
747
748/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
749 * clears flags. */
750#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
751 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
753
754/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
755 * clears flags. */
756#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
757 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
759
760#undef IEM_MC_SET_RIP_U16_AND_FINISH
761
762
763/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
764#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
765 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
766
767/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
768#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
769 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
772 * clears flags. */
773#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
774 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
775 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
776
777/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
778 * and clears flags. */
779#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
780 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
782
783#undef IEM_MC_SET_RIP_U32_AND_FINISH
784
785
786/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
787#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
788 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
789
790/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
791 * and clears flags. */
792#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
793 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
794 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
795
796#undef IEM_MC_SET_RIP_U64_AND_FINISH
797
798
799/** Same as iemRegRipJumpU16AndFinishNoFlags,
800 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
801DECL_INLINE_THROW(uint32_t)
802iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
803 uint8_t idxInstr, uint8_t cbVar)
804{
805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
807
808 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
809 off = iemNativeRegFlushPendingWrites(pReNative, off);
810
811#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
812 Assert(pReNative->Core.offPc == 0);
813
814 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
815#endif
816
817 /* Get a register with the new PC loaded from idxVarPc.
818 Note! This ASSUMES that the high bits of the GPR is zeroed. */
819 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
820
821 /* Check limit (may #GP(0) + exit TB). */
822 if (!f64Bit)
823/** @todo we can skip this test in FLAT 32-bit mode. */
824 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
825 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
826 else if (cbVar > sizeof(uint32_t))
827 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
828
829 /* Store the result. */
830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
831
832 iemNativeVarRegisterRelease(pReNative, idxVarPc);
833 /** @todo implictly free the variable? */
834
835 return off;
836}
837
838
839
840/*********************************************************************************************************************************
841* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
842*********************************************************************************************************************************/
843
844#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
845 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
846
847/**
848 * Emits code to check if a \#NM exception should be raised.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxInstr The current instruction.
854 */
855DECL_INLINE_THROW(uint32_t)
856iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
857{
858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
859 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
860
861 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
862 {
863#endif
864 /*
865 * Make sure we don't have any outstanding guest register writes as we may
866 * raise an #NM and all guest register must be up to date in CPUMCTX.
867 */
868 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
869 off = iemNativeRegFlushPendingWrites(pReNative, off);
870
871#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
872 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
873#else
874 RT_NOREF(idxInstr);
875#endif
876
877 /* Allocate a temporary CR0 register. */
878 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
879 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
880
881 /*
882 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
883 * return raisexcpt();
884 */
885 /* Test and jump. */
886 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
887
888 /* Free but don't flush the CR0 register. */
889 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
890
891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
893 }
894 else
895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
896#endif
897
898 return off;
899}
900
901
902#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
903 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
904
905/**
906 * Emits code to check if a \#MF exception should be raised.
907 *
908 * @returns New code buffer offset, UINT32_MAX on failure.
909 * @param pReNative The native recompile state.
910 * @param off The code buffer offset.
911 * @param idxInstr The current instruction.
912 */
913DECL_INLINE_THROW(uint32_t)
914iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
915{
916 /*
917 * Make sure we don't have any outstanding guest register writes as we may
918 * raise an #MF and all guest register must be up to date in CPUMCTX.
919 */
920 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
921 off = iemNativeRegFlushPendingWrites(pReNative, off);
922
923#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
924 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
925#else
926 RT_NOREF(idxInstr);
927#endif
928
929 /* Allocate a temporary FSW register. */
930 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
931 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
932
933 /*
934 * if (FSW & X86_FSW_ES != 0)
935 * return raisexcpt();
936 */
937 /* Test and jump. */
938 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
939
940 /* Free but don't flush the FSW register. */
941 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
942
943 return off;
944}
945
946
947#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
948 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
949
950/**
951 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
952 *
953 * @returns New code buffer offset, UINT32_MAX on failure.
954 * @param pReNative The native recompile state.
955 * @param off The code buffer offset.
956 * @param idxInstr The current instruction.
957 */
958DECL_INLINE_THROW(uint32_t)
959iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
960{
961#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
962 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
963
964 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
965 {
966#endif
967 /*
968 * Make sure we don't have any outstanding guest register writes as we may
969 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
970 */
971 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
972 off = iemNativeRegFlushPendingWrites(pReNative, off);
973
974#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
975 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
976#else
977 RT_NOREF(idxInstr);
978#endif
979
980 /* Allocate a temporary CR0 and CR4 register. */
981 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
982 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
983 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
984 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
985
986 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
987#ifdef RT_ARCH_AMD64
988 /*
989 * We do a modified test here:
990 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
991 * else { goto RaiseSseRelated; }
992 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
993 * all targets except the 386, which doesn't support SSE, this should
994 * be a safe assumption.
995 */
996 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
997 //pCodeBuf[off++] = 0xcc;
998 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
999 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
1000 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
1001 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
1002 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
1003 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
1004
1005#elif defined(RT_ARCH_ARM64)
1006 /*
1007 * We do a modified test here:
1008 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
1009 * else { goto RaiseSseRelated; }
1010 */
1011 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
1012 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1013 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
1014 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
1015 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
1016 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1017 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1018 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
1019 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1020 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1021 idxLabelRaiseSseRelated);
1022
1023#else
1024# error "Port me!"
1025#endif
1026
1027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1028 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1029 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1030 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1031
1032#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1033 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1034 }
1035 else
1036 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1037#endif
1038
1039 return off;
1040}
1041
1042
1043#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1044 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1045
1046/**
1047 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1048 *
1049 * @returns New code buffer offset, UINT32_MAX on failure.
1050 * @param pReNative The native recompile state.
1051 * @param off The code buffer offset.
1052 * @param idxInstr The current instruction.
1053 */
1054DECL_INLINE_THROW(uint32_t)
1055iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1056{
1057#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1058 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1059
1060 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1061 {
1062#endif
1063 /*
1064 * Make sure we don't have any outstanding guest register writes as we may
1065 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1066 */
1067 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1068 off = iemNativeRegFlushPendingWrites(pReNative, off);
1069
1070#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1071 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1072#else
1073 RT_NOREF(idxInstr);
1074#endif
1075
1076 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1077 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1078 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1079 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1080 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1081 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1082
1083 /*
1084 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1085 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1086 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1087 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1088 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1089 * { likely }
1090 * else { goto RaiseAvxRelated; }
1091 */
1092#ifdef RT_ARCH_AMD64
1093 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1094 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1095 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1096 ^ 0x1a) ) { likely }
1097 else { goto RaiseAvxRelated; } */
1098 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1099 //pCodeBuf[off++] = 0xcc;
1100 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1101 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1102 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1103 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1104 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1105 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1106 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1107 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1108 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1109 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1110 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1111
1112#elif defined(RT_ARCH_ARM64)
1113 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1114 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1115 else { goto RaiseAvxRelated; } */
1116 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1117 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1118 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1120 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1121 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1122 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1123 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1124 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1125 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1126 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1127 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1128 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1129 idxLabelRaiseAvxRelated);
1130
1131#else
1132# error "Port me!"
1133#endif
1134
1135 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1136 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1137 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1138 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1139#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1140 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1141 }
1142 else
1143 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1144#endif
1145
1146 return off;
1147}
1148
1149
1150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1151#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1152 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
1153
1154/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1155DECL_INLINE_THROW(uint32_t)
1156iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1157{
1158 /*
1159 * Make sure we don't have any outstanding guest register writes as we may
1160 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
1161 */
1162 off = iemNativeRegFlushPendingWrites(pReNative, off);
1163
1164#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1165 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1166#else
1167 RT_NOREF(idxInstr);
1168#endif
1169
1170 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1171 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1172 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1173
1174 /* mov tmp, varmxcsr */
1175 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1176 /* tmp &= X86_MXCSR_XCPT_MASK */
1177 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1178 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1179 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1180 /* tmp = ~tmp */
1181 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1182 /* tmp &= mxcsr */
1183 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1184 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1185 idxLabelRaiseSseAvxFpRelated);
1186
1187 /* Free but don't flush the MXCSR register. */
1188 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1189 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1190
1191 return off;
1192}
1193#endif
1194
1195
1196#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1197 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1198
1199/**
1200 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1201 *
1202 * @returns New code buffer offset, UINT32_MAX on failure.
1203 * @param pReNative The native recompile state.
1204 * @param off The code buffer offset.
1205 * @param idxInstr The current instruction.
1206 */
1207DECL_INLINE_THROW(uint32_t)
1208iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1209{
1210 /*
1211 * Make sure we don't have any outstanding guest register writes as we may
1212 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1213 */
1214 off = iemNativeRegFlushPendingWrites(pReNative, off);
1215
1216#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1217 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1218#else
1219 RT_NOREF(idxInstr);
1220#endif
1221
1222 /* Allocate a temporary CR4 register. */
1223 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1224 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1225 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1226
1227 /*
1228 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1229 * return raisexcpt();
1230 */
1231 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1232
1233 /* raise \#UD exception unconditionally. */
1234 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1235
1236 /* Free but don't flush the CR4 register. */
1237 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1238
1239 return off;
1240}
1241
1242
1243#define IEM_MC_RAISE_DIVIDE_ERROR() \
1244 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1245
1246/**
1247 * Emits code to raise a \#DE.
1248 *
1249 * @returns New code buffer offset, UINT32_MAX on failure.
1250 * @param pReNative The native recompile state.
1251 * @param off The code buffer offset.
1252 * @param idxInstr The current instruction.
1253 */
1254DECL_INLINE_THROW(uint32_t)
1255iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1256{
1257 /*
1258 * Make sure we don't have any outstanding guest register writes as we may
1259 */
1260 off = iemNativeRegFlushPendingWrites(pReNative, off);
1261
1262#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1263 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1264#else
1265 RT_NOREF(idxInstr);
1266#endif
1267
1268 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1269
1270 /* raise \#DE exception unconditionally. */
1271 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1272
1273 return off;
1274}
1275
1276
1277/*********************************************************************************************************************************
1278* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1279*********************************************************************************************************************************/
1280
1281/**
1282 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1283 *
1284 * @returns Pointer to the condition stack entry on success, NULL on failure
1285 * (too many nestings)
1286 */
1287DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1288{
1289#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1290 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1291#endif
1292
1293 uint32_t const idxStack = pReNative->cCondDepth;
1294 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1295
1296 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1297 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1298
1299 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1300 pEntry->fInElse = false;
1301 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1302 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1303
1304 return pEntry;
1305}
1306
1307
1308/**
1309 * Start of the if-block, snapshotting the register and variable state.
1310 */
1311DECL_INLINE_THROW(void)
1312iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1313{
1314 Assert(offIfBlock != UINT32_MAX);
1315 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1316 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1317 Assert(!pEntry->fInElse);
1318
1319 /* Define the start of the IF block if request or for disassembly purposes. */
1320 if (idxLabelIf != UINT32_MAX)
1321 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1322#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1323 else
1324 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1325#else
1326 RT_NOREF(offIfBlock);
1327#endif
1328
1329#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1330 Assert(pReNative->Core.offPc == 0);
1331#endif
1332
1333 /* Copy the initial state so we can restore it in the 'else' block. */
1334 pEntry->InitialState = pReNative->Core;
1335}
1336
1337
1338#define IEM_MC_ELSE() } while (0); \
1339 off = iemNativeEmitElse(pReNative, off); \
1340 do {
1341
1342/** Emits code related to IEM_MC_ELSE. */
1343DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1344{
1345 /* Check sanity and get the conditional stack entry. */
1346 Assert(off != UINT32_MAX);
1347 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1348 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1349 Assert(!pEntry->fInElse);
1350
1351#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1352 /* Writeback any dirty shadow registers. */
1353 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1354 * in one of the branches and leave guest registers already dirty before the start of the if
1355 * block alone. */
1356 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1357#endif
1358
1359 /* Jump to the endif */
1360 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1361
1362 /* Define the else label and enter the else part of the condition. */
1363 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1364 pEntry->fInElse = true;
1365
1366#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1367 Assert(pReNative->Core.offPc == 0);
1368#endif
1369
1370 /* Snapshot the core state so we can do a merge at the endif and restore
1371 the snapshot we took at the start of the if-block. */
1372 pEntry->IfFinalState = pReNative->Core;
1373 pReNative->Core = pEntry->InitialState;
1374
1375 return off;
1376}
1377
1378
1379#define IEM_MC_ENDIF() } while (0); \
1380 off = iemNativeEmitEndIf(pReNative, off)
1381
1382/** Emits code related to IEM_MC_ENDIF. */
1383DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1384{
1385 /* Check sanity and get the conditional stack entry. */
1386 Assert(off != UINT32_MAX);
1387 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1388 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1389
1390#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1391 Assert(pReNative->Core.offPc == 0);
1392#endif
1393#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1394 /* Writeback any dirty shadow registers (else branch). */
1395 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1396 * in one of the branches and leave guest registers already dirty before the start of the if
1397 * block alone. */
1398 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1399#endif
1400
1401 /*
1402 * Now we have find common group with the core state at the end of the
1403 * if-final. Use the smallest common denominator and just drop anything
1404 * that isn't the same in both states.
1405 */
1406 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1407 * which is why we're doing this at the end of the else-block.
1408 * But we'd need more info about future for that to be worth the effort. */
1409 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1410#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1411 Assert( pOther->bmGstRegShadowDirty == 0
1412 && pReNative->Core.bmGstRegShadowDirty == 0);
1413#endif
1414
1415 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1416 {
1417 /* shadow guest stuff first. */
1418 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1419 if (fGstRegs)
1420 {
1421 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1422 do
1423 {
1424 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1425 fGstRegs &= ~RT_BIT_64(idxGstReg);
1426
1427 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1428 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1429 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1430 {
1431 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1432 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1433
1434#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1435 /* Writeback any dirty shadow registers we are about to unshadow. */
1436 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1437#endif
1438 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1439 }
1440 } while (fGstRegs);
1441 }
1442 else
1443 {
1444 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1445#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1446 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1447#endif
1448 }
1449
1450 /* Check variables next. For now we must require them to be identical
1451 or stuff we can recreate. */
1452 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1453 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1454 if (fVars)
1455 {
1456 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1457 do
1458 {
1459 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1460 fVars &= ~RT_BIT_32(idxVar);
1461
1462 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1463 {
1464 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1465 continue;
1466 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1467 {
1468 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1469 if (idxHstReg != UINT8_MAX)
1470 {
1471 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1472 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1473 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1474 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1475 }
1476 continue;
1477 }
1478 }
1479 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1480 continue;
1481
1482 /* Irreconcilable, so drop it. */
1483 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1484 if (idxHstReg != UINT8_MAX)
1485 {
1486 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1487 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1488 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1489 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1490 }
1491 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1492 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1493 } while (fVars);
1494 }
1495
1496 /* Finally, check that the host register allocations matches. */
1497 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1498 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1499 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1500 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1501 }
1502
1503 /*
1504 * Define the endif label and maybe the else one if we're still in the 'if' part.
1505 */
1506 if (!pEntry->fInElse)
1507 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1508 else
1509 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1510 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1511
1512 /* Pop the conditional stack.*/
1513 pReNative->cCondDepth -= 1;
1514
1515 return off;
1516}
1517
1518
1519#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1520 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1521 do {
1522
1523/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1524DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1525{
1526 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1527 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1528
1529 /* Get the eflags. */
1530 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1531 kIemNativeGstRegUse_ReadOnly);
1532
1533 /* Test and jump. */
1534 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1535
1536 /* Free but don't flush the EFlags register. */
1537 iemNativeRegFreeTmp(pReNative, idxEflReg);
1538
1539 /* Make a copy of the core state now as we start the if-block. */
1540 iemNativeCondStartIfBlock(pReNative, off);
1541
1542 return off;
1543}
1544
1545
1546#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1547 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1548 do {
1549
1550/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1551DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1552{
1553 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1554 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1555
1556 /* Get the eflags. */
1557 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1558 kIemNativeGstRegUse_ReadOnly);
1559
1560 /* Test and jump. */
1561 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1562
1563 /* Free but don't flush the EFlags register. */
1564 iemNativeRegFreeTmp(pReNative, idxEflReg);
1565
1566 /* Make a copy of the core state now as we start the if-block. */
1567 iemNativeCondStartIfBlock(pReNative, off);
1568
1569 return off;
1570}
1571
1572
1573#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1574 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1575 do {
1576
1577/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1578DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1579{
1580 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1581 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1582
1583 /* Get the eflags. */
1584 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1585 kIemNativeGstRegUse_ReadOnly);
1586
1587 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1588 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1589
1590 /* Test and jump. */
1591 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1592
1593 /* Free but don't flush the EFlags register. */
1594 iemNativeRegFreeTmp(pReNative, idxEflReg);
1595
1596 /* Make a copy of the core state now as we start the if-block. */
1597 iemNativeCondStartIfBlock(pReNative, off);
1598
1599 return off;
1600}
1601
1602
1603#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1604 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1605 do {
1606
1607/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1608DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1609{
1610 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1611 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1612
1613 /* Get the eflags. */
1614 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1615 kIemNativeGstRegUse_ReadOnly);
1616
1617 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1618 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1619
1620 /* Test and jump. */
1621 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1622
1623 /* Free but don't flush the EFlags register. */
1624 iemNativeRegFreeTmp(pReNative, idxEflReg);
1625
1626 /* Make a copy of the core state now as we start the if-block. */
1627 iemNativeCondStartIfBlock(pReNative, off);
1628
1629 return off;
1630}
1631
1632
1633#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1634 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1635 do {
1636
1637#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1638 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1639 do {
1640
1641/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1642DECL_INLINE_THROW(uint32_t)
1643iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1644 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1645{
1646 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1647 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1648
1649 /* Get the eflags. */
1650 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1651 kIemNativeGstRegUse_ReadOnly);
1652
1653 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1654 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1655
1656 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1657 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1658 Assert(iBitNo1 != iBitNo2);
1659
1660#ifdef RT_ARCH_AMD64
1661 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1662
1663 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1664 if (iBitNo1 > iBitNo2)
1665 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1666 else
1667 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1668 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1669
1670#elif defined(RT_ARCH_ARM64)
1671 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1672 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1673
1674 /* and tmpreg, eflreg, #1<<iBitNo1 */
1675 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1676
1677 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1678 if (iBitNo1 > iBitNo2)
1679 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1680 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1681 else
1682 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1683 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1684
1685 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1686
1687#else
1688# error "Port me"
1689#endif
1690
1691 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1692 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1693 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1694
1695 /* Free but don't flush the EFlags and tmp registers. */
1696 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1697 iemNativeRegFreeTmp(pReNative, idxEflReg);
1698
1699 /* Make a copy of the core state now as we start the if-block. */
1700 iemNativeCondStartIfBlock(pReNative, off);
1701
1702 return off;
1703}
1704
1705
1706#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1707 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1708 do {
1709
1710#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1711 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1712 do {
1713
1714/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1715 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1716DECL_INLINE_THROW(uint32_t)
1717iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1718 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1719{
1720 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1721 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1722
1723 /* We need an if-block label for the non-inverted variant. */
1724 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1725 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1726
1727 /* Get the eflags. */
1728 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1729 kIemNativeGstRegUse_ReadOnly);
1730
1731 /* Translate the flag masks to bit numbers. */
1732 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1733 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1734
1735 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1736 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1737 Assert(iBitNo1 != iBitNo);
1738
1739 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1740 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1741 Assert(iBitNo2 != iBitNo);
1742 Assert(iBitNo2 != iBitNo1);
1743
1744#ifdef RT_ARCH_AMD64
1745 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1746#elif defined(RT_ARCH_ARM64)
1747 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1748#endif
1749
1750 /* Check for the lone bit first. */
1751 if (!fInverted)
1752 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1753 else
1754 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1755
1756 /* Then extract and compare the other two bits. */
1757#ifdef RT_ARCH_AMD64
1758 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1759 if (iBitNo1 > iBitNo2)
1760 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1761 else
1762 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1763 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1764
1765#elif defined(RT_ARCH_ARM64)
1766 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1767
1768 /* and tmpreg, eflreg, #1<<iBitNo1 */
1769 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1770
1771 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1772 if (iBitNo1 > iBitNo2)
1773 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1774 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1775 else
1776 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1777 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1778
1779 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1780
1781#else
1782# error "Port me"
1783#endif
1784
1785 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1786 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1787 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1788
1789 /* Free but don't flush the EFlags and tmp registers. */
1790 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1791 iemNativeRegFreeTmp(pReNative, idxEflReg);
1792
1793 /* Make a copy of the core state now as we start the if-block. */
1794 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1795
1796 return off;
1797}
1798
1799
1800#define IEM_MC_IF_CX_IS_NZ() \
1801 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1802 do {
1803
1804/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1805DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1806{
1807 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1808
1809 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1810 kIemNativeGstRegUse_ReadOnly);
1811 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1812 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1813
1814 iemNativeCondStartIfBlock(pReNative, off);
1815 return off;
1816}
1817
1818
1819#define IEM_MC_IF_ECX_IS_NZ() \
1820 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1821 do {
1822
1823#define IEM_MC_IF_RCX_IS_NZ() \
1824 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1825 do {
1826
1827/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1828DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1829{
1830 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1831
1832 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1833 kIemNativeGstRegUse_ReadOnly);
1834 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1835 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1836
1837 iemNativeCondStartIfBlock(pReNative, off);
1838 return off;
1839}
1840
1841
1842#define IEM_MC_IF_CX_IS_NOT_ONE() \
1843 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1844 do {
1845
1846/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1847DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1848{
1849 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1850
1851 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1852 kIemNativeGstRegUse_ReadOnly);
1853#ifdef RT_ARCH_AMD64
1854 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1855#else
1856 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1857 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1858 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1859#endif
1860 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1861
1862 iemNativeCondStartIfBlock(pReNative, off);
1863 return off;
1864}
1865
1866
1867#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1868 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1869 do {
1870
1871#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1872 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1873 do {
1874
1875/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1876DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1877{
1878 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1879
1880 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1881 kIemNativeGstRegUse_ReadOnly);
1882 if (f64Bit)
1883 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1884 else
1885 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1886 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1887
1888 iemNativeCondStartIfBlock(pReNative, off);
1889 return off;
1890}
1891
1892
1893#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1894 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1895 do {
1896
1897#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1898 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1899 do {
1900
1901/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1902 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1903DECL_INLINE_THROW(uint32_t)
1904iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1905{
1906 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1907 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1908
1909 /* We have to load both RCX and EFLAGS before we can start branching,
1910 otherwise we'll end up in the else-block with an inconsistent
1911 register allocator state.
1912 Doing EFLAGS first as it's more likely to be loaded, right? */
1913 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1914 kIemNativeGstRegUse_ReadOnly);
1915 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1916 kIemNativeGstRegUse_ReadOnly);
1917
1918 /** @todo we could reduce this to a single branch instruction by spending a
1919 * temporary register and some setnz stuff. Not sure if loops are
1920 * worth it. */
1921 /* Check CX. */
1922#ifdef RT_ARCH_AMD64
1923 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1924#else
1925 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1926 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1927 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1928#endif
1929
1930 /* Check the EFlags bit. */
1931 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1932 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1933 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1934 !fCheckIfSet /*fJmpIfSet*/);
1935
1936 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1937 iemNativeRegFreeTmp(pReNative, idxEflReg);
1938
1939 iemNativeCondStartIfBlock(pReNative, off);
1940 return off;
1941}
1942
1943
1944#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1945 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1946 do {
1947
1948#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1949 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1950 do {
1951
1952#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1953 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1954 do {
1955
1956#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1957 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1958 do {
1959
1960/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1961 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1962 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1963 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1964DECL_INLINE_THROW(uint32_t)
1965iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1966 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1967{
1968 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1969 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1970
1971 /* We have to load both RCX and EFLAGS before we can start branching,
1972 otherwise we'll end up in the else-block with an inconsistent
1973 register allocator state.
1974 Doing EFLAGS first as it's more likely to be loaded, right? */
1975 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1976 kIemNativeGstRegUse_ReadOnly);
1977 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1978 kIemNativeGstRegUse_ReadOnly);
1979
1980 /** @todo we could reduce this to a single branch instruction by spending a
1981 * temporary register and some setnz stuff. Not sure if loops are
1982 * worth it. */
1983 /* Check RCX/ECX. */
1984 if (f64Bit)
1985 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1986 else
1987 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1988
1989 /* Check the EFlags bit. */
1990 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1991 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1992 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1993 !fCheckIfSet /*fJmpIfSet*/);
1994
1995 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1996 iemNativeRegFreeTmp(pReNative, idxEflReg);
1997
1998 iemNativeCondStartIfBlock(pReNative, off);
1999 return off;
2000}
2001
2002
2003#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
2004 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
2005 do {
2006
2007/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
2008DECL_INLINE_THROW(uint32_t)
2009iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
2010{
2011 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2012
2013 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
2014 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
2015 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2016 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2017
2018 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
2019
2020 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
2021
2022 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
2023
2024 iemNativeCondStartIfBlock(pReNative, off);
2025 return off;
2026}
2027
2028
2029#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
2030 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
2031 do {
2032
2033/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
2034DECL_INLINE_THROW(uint32_t)
2035iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
2036{
2037 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2038 Assert(iGReg < 16);
2039
2040 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2041 kIemNativeGstRegUse_ReadOnly);
2042
2043 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2044
2045 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2046
2047 iemNativeCondStartIfBlock(pReNative, off);
2048 return off;
2049}
2050
2051
2052#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2053
2054#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
2055 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
2056 do {
2057
2058/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
2059DECL_INLINE_THROW(uint32_t)
2060iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2061{
2062 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2063
2064 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2065 kIemNativeGstRegUse_Calculation);
2066 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2067
2068 /* mov tmp0, mxcsr */
2069 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2070 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
2071 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
2072 /* mxcsr &= X86_MXCSR_XCPT_MASK */
2073 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
2074 /* mxcsr ~= mxcsr */
2075 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
2076 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
2077 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
2078 /* tmp0 &= mxcsr */
2079 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2080
2081 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
2082 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
2083 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2084
2085 iemNativeCondStartIfBlock(pReNative, off);
2086 return off;
2087}
2088
2089#endif
2090
2091
2092/*********************************************************************************************************************************
2093* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2094*********************************************************************************************************************************/
2095
2096#define IEM_MC_NOREF(a_Name) \
2097 RT_NOREF_PV(a_Name)
2098
2099#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2100 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2101
2102#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2103 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2104
2105#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2106 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2107
2108#define IEM_MC_LOCAL(a_Type, a_Name) \
2109 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2110
2111#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2112 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2113
2114#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2115 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2116
2117
2118/**
2119 * Sets the host register for @a idxVarRc to @a idxReg.
2120 *
2121 * The register must not be allocated. Any guest register shadowing will be
2122 * implictly dropped by this call.
2123 *
2124 * The variable must not have any register associated with it (causes
2125 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2126 * implied.
2127 *
2128 * @returns idxReg
2129 * @param pReNative The recompiler state.
2130 * @param idxVar The variable.
2131 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2132 * @param off For recording in debug info.
2133 *
2134 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2135 */
2136DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2137{
2138 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2139 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2140 Assert(!pVar->fRegAcquired);
2141 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2142 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2143 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2144
2145 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2146 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2147
2148 iemNativeVarSetKindToStack(pReNative, idxVar);
2149 pVar->idxReg = idxReg;
2150
2151 return idxReg;
2152}
2153
2154
2155/**
2156 * A convenient helper function.
2157 */
2158DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2159 uint8_t idxReg, uint32_t *poff)
2160{
2161 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2162 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2163 return idxReg;
2164}
2165
2166
2167/**
2168 * This is called by IEM_MC_END() to clean up all variables.
2169 */
2170DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2171{
2172 uint32_t const bmVars = pReNative->Core.bmVars;
2173 if (bmVars != 0)
2174 iemNativeVarFreeAllSlow(pReNative, bmVars);
2175 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2176 Assert(pReNative->Core.bmStack == 0);
2177}
2178
2179
2180#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2181
2182/**
2183 * This is called by IEM_MC_FREE_LOCAL.
2184 */
2185DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2186{
2187 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2188 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2189 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2190}
2191
2192
2193#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2194
2195/**
2196 * This is called by IEM_MC_FREE_ARG.
2197 */
2198DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2199{
2200 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2201 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2202 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2203}
2204
2205
2206#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2207
2208/**
2209 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2210 */
2211DECL_INLINE_THROW(uint32_t)
2212iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2213{
2214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2215 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2216 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2217 Assert( pVarDst->cbVar == sizeof(uint16_t)
2218 || pVarDst->cbVar == sizeof(uint32_t));
2219
2220 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2221 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2222 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2223 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2224 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2225
2226 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2227
2228 /*
2229 * Special case for immediates.
2230 */
2231 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2232 {
2233 switch (pVarDst->cbVar)
2234 {
2235 case sizeof(uint16_t):
2236 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2237 break;
2238 case sizeof(uint32_t):
2239 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2240 break;
2241 default: AssertFailed(); break;
2242 }
2243 }
2244 else
2245 {
2246 /*
2247 * The generic solution for now.
2248 */
2249 /** @todo optimize this by having the python script make sure the source
2250 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2251 * statement. Then we could just transfer the register assignments. */
2252 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2253 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2254 switch (pVarDst->cbVar)
2255 {
2256 case sizeof(uint16_t):
2257 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2258 break;
2259 case sizeof(uint32_t):
2260 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2261 break;
2262 default: AssertFailed(); break;
2263 }
2264 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2265 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2266 }
2267 return off;
2268}
2269
2270
2271
2272/*********************************************************************************************************************************
2273* Emitters for IEM_MC_CALL_CIMPL_XXX *
2274*********************************************************************************************************************************/
2275
2276/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2277DECL_INLINE_THROW(uint32_t)
2278iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2279 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2280
2281{
2282 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2283
2284#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2285 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2286 when a calls clobber any of the relevatn control registers. */
2287# if 1
2288 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2289 {
2290 /* Likely as long as call+ret are done via cimpl. */
2291 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2292 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2293 }
2294 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2295 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2296 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2297 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2298 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2299 else
2300 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2301 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2302 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2303
2304# else
2305 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2306 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2307 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2308 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2309 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2310 || pfnCImpl == (uintptr_t)iemCImpl_callf
2311 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2312 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2313 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2314 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2315 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2316# endif
2317#endif
2318
2319 /*
2320 * Do all the call setup and cleanup.
2321 */
2322 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2323
2324 /*
2325 * Load the two or three hidden arguments.
2326 */
2327#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2328 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2329 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2330 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2331#else
2332 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2333 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2334#endif
2335
2336 /*
2337 * Make the call and check the return code.
2338 *
2339 * Shadow PC copies are always flushed here, other stuff depends on flags.
2340 * Segment and general purpose registers are explictily flushed via the
2341 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2342 * macros.
2343 */
2344 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2345#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2346 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2347#endif
2348 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2349 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2350 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2351 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2352
2353 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2354}
2355
2356
2357#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2358 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2359
2360/** Emits code for IEM_MC_CALL_CIMPL_1. */
2361DECL_INLINE_THROW(uint32_t)
2362iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2363 uintptr_t pfnCImpl, uint8_t idxArg0)
2364{
2365 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2366 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2367}
2368
2369
2370#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2371 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2372
2373/** Emits code for IEM_MC_CALL_CIMPL_2. */
2374DECL_INLINE_THROW(uint32_t)
2375iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2376 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2377{
2378 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2379 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2380 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2381}
2382
2383
2384#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2385 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2386 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2387
2388/** Emits code for IEM_MC_CALL_CIMPL_3. */
2389DECL_INLINE_THROW(uint32_t)
2390iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2391 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2392{
2393 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2394 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2395 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2396 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2397}
2398
2399
2400#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2401 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2402 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2403
2404/** Emits code for IEM_MC_CALL_CIMPL_4. */
2405DECL_INLINE_THROW(uint32_t)
2406iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2407 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2408{
2409 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2410 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2411 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2412 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2413 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2414}
2415
2416
2417#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2418 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2419 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2420
2421/** Emits code for IEM_MC_CALL_CIMPL_4. */
2422DECL_INLINE_THROW(uint32_t)
2423iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2424 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2425{
2426 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2427 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2428 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2429 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2430 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2431 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2432}
2433
2434
2435/** Recompiler debugging: Flush guest register shadow copies. */
2436#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2437
2438
2439
2440/*********************************************************************************************************************************
2441* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2442*********************************************************************************************************************************/
2443
2444/**
2445 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2446 */
2447DECL_INLINE_THROW(uint32_t)
2448iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2449 uintptr_t pfnAImpl, uint8_t cArgs)
2450{
2451 if (idxVarRc != UINT8_MAX)
2452 {
2453 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2454 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2455 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2456 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2457 }
2458
2459 /*
2460 * Do all the call setup and cleanup.
2461 *
2462 * It is only required to flush pending guest register writes in call volatile registers as
2463 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2464 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2465 * no matter the fFlushPendingWrites parameter.
2466 */
2467 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2468
2469 /*
2470 * Make the call and update the return code variable if we've got one.
2471 */
2472 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2473 if (idxVarRc != UINT8_MAX)
2474 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2475
2476 return off;
2477}
2478
2479
2480
2481#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2482 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2483
2484#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2485 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2486
2487/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2488DECL_INLINE_THROW(uint32_t)
2489iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2490{
2491 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2492}
2493
2494
2495#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2496 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2497
2498#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2499 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2500
2501/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2502DECL_INLINE_THROW(uint32_t)
2503iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2504{
2505 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2506 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2507}
2508
2509
2510#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2511 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2512
2513#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2514 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2515
2516/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2517DECL_INLINE_THROW(uint32_t)
2518iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2519 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2520{
2521 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2522 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2523 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2524}
2525
2526
2527#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2528 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2529
2530#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2531 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2532
2533/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2534DECL_INLINE_THROW(uint32_t)
2535iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2536 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2537{
2538 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2539 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2540 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2541 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2542}
2543
2544
2545#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2546 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2547
2548#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2549 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2550
2551/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2552DECL_INLINE_THROW(uint32_t)
2553iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2554 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2555{
2556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2557 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2558 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2559 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2560 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2561}
2562
2563
2564
2565/*********************************************************************************************************************************
2566* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2567*********************************************************************************************************************************/
2568
2569#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2570 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2571
2572#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2573 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2574
2575#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2576 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2577
2578#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2579 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2580
2581
2582/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2583 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2584DECL_INLINE_THROW(uint32_t)
2585iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2586{
2587 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2588 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2589 Assert(iGRegEx < 20);
2590
2591 /* Same discussion as in iemNativeEmitFetchGregU16 */
2592 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2593 kIemNativeGstRegUse_ReadOnly);
2594
2595 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2596 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2597
2598 /* The value is zero-extended to the full 64-bit host register width. */
2599 if (iGRegEx < 16)
2600 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2601 else
2602 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2603
2604 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2605 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2606 return off;
2607}
2608
2609
2610#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2611 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2612
2613#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2614 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2615
2616#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2617 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2618
2619/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2620DECL_INLINE_THROW(uint32_t)
2621iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2622{
2623 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2624 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2625 Assert(iGRegEx < 20);
2626
2627 /* Same discussion as in iemNativeEmitFetchGregU16 */
2628 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2629 kIemNativeGstRegUse_ReadOnly);
2630
2631 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2632 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2633
2634 if (iGRegEx < 16)
2635 {
2636 switch (cbSignExtended)
2637 {
2638 case sizeof(uint16_t):
2639 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2640 break;
2641 case sizeof(uint32_t):
2642 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2643 break;
2644 case sizeof(uint64_t):
2645 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2646 break;
2647 default: AssertFailed(); break;
2648 }
2649 }
2650 else
2651 {
2652 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2653 switch (cbSignExtended)
2654 {
2655 case sizeof(uint16_t):
2656 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2657 break;
2658 case sizeof(uint32_t):
2659 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2660 break;
2661 case sizeof(uint64_t):
2662 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2663 break;
2664 default: AssertFailed(); break;
2665 }
2666 }
2667
2668 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2669 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2670 return off;
2671}
2672
2673
2674
2675#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2676 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2677
2678#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2679 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2680
2681#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2682 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2683
2684/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2685DECL_INLINE_THROW(uint32_t)
2686iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2687{
2688 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2689 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2690 Assert(iGReg < 16);
2691
2692 /*
2693 * We can either just load the low 16-bit of the GPR into a host register
2694 * for the variable, or we can do so via a shadow copy host register. The
2695 * latter will avoid having to reload it if it's being stored later, but
2696 * will waste a host register if it isn't touched again. Since we don't
2697 * know what going to happen, we choose the latter for now.
2698 */
2699 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2700 kIemNativeGstRegUse_ReadOnly);
2701
2702 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2703 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2704 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2705 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2706
2707 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2708 return off;
2709}
2710
2711
2712#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2713 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2714
2715#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2716 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2717
2718/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2719DECL_INLINE_THROW(uint32_t)
2720iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2721{
2722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2723 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2724 Assert(iGReg < 16);
2725
2726 /*
2727 * We can either just load the low 16-bit of the GPR into a host register
2728 * for the variable, or we can do so via a shadow copy host register. The
2729 * latter will avoid having to reload it if it's being stored later, but
2730 * will waste a host register if it isn't touched again. Since we don't
2731 * know what going to happen, we choose the latter for now.
2732 */
2733 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2734 kIemNativeGstRegUse_ReadOnly);
2735
2736 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2737 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2738 if (cbSignExtended == sizeof(uint32_t))
2739 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2740 else
2741 {
2742 Assert(cbSignExtended == sizeof(uint64_t));
2743 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2744 }
2745 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2746
2747 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2748 return off;
2749}
2750
2751
2752#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2753 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2754
2755#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2756 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2757
2758/** Emits code for IEM_MC_FETCH_GREG_U32. */
2759DECL_INLINE_THROW(uint32_t)
2760iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2761{
2762 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2763 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2764 Assert(iGReg < 16);
2765
2766 /*
2767 * We can either just load the low 16-bit of the GPR into a host register
2768 * for the variable, or we can do so via a shadow copy host register. The
2769 * latter will avoid having to reload it if it's being stored later, but
2770 * will waste a host register if it isn't touched again. Since we don't
2771 * know what going to happen, we choose the latter for now.
2772 */
2773 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2774 kIemNativeGstRegUse_ReadOnly);
2775
2776 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2777 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2778 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2779 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2780
2781 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2782 return off;
2783}
2784
2785
2786#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2787 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2788
2789/** Emits code for IEM_MC_FETCH_GREG_U32. */
2790DECL_INLINE_THROW(uint32_t)
2791iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2792{
2793 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2794 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2795 Assert(iGReg < 16);
2796
2797 /*
2798 * We can either just load the low 32-bit of the GPR into a host register
2799 * for the variable, or we can do so via a shadow copy host register. The
2800 * latter will avoid having to reload it if it's being stored later, but
2801 * will waste a host register if it isn't touched again. Since we don't
2802 * know what going to happen, we choose the latter for now.
2803 */
2804 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2805 kIemNativeGstRegUse_ReadOnly);
2806
2807 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2808 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2809 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2810 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2811
2812 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2813 return off;
2814}
2815
2816
2817#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2818 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2819
2820#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2821 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2822
2823/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2824 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2825DECL_INLINE_THROW(uint32_t)
2826iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2827{
2828 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2829 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2830 Assert(iGReg < 16);
2831
2832 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2833 kIemNativeGstRegUse_ReadOnly);
2834
2835 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2836 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2837 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2838 /** @todo name the register a shadow one already? */
2839 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2840
2841 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2842 return off;
2843}
2844
2845
2846
2847/*********************************************************************************************************************************
2848* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2849*********************************************************************************************************************************/
2850
2851#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2852 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2853
2854/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2855DECL_INLINE_THROW(uint32_t)
2856iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2857{
2858 Assert(iGRegEx < 20);
2859 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2860 kIemNativeGstRegUse_ForUpdate);
2861#ifdef RT_ARCH_AMD64
2862 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2863
2864 /* To the lowest byte of the register: mov r8, imm8 */
2865 if (iGRegEx < 16)
2866 {
2867 if (idxGstTmpReg >= 8)
2868 pbCodeBuf[off++] = X86_OP_REX_B;
2869 else if (idxGstTmpReg >= 4)
2870 pbCodeBuf[off++] = X86_OP_REX;
2871 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2872 pbCodeBuf[off++] = u8Value;
2873 }
2874 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2875 else if (idxGstTmpReg < 4)
2876 {
2877 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2878 pbCodeBuf[off++] = u8Value;
2879 }
2880 else
2881 {
2882 /* ror reg64, 8 */
2883 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2884 pbCodeBuf[off++] = 0xc1;
2885 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2886 pbCodeBuf[off++] = 8;
2887
2888 /* mov reg8, imm8 */
2889 if (idxGstTmpReg >= 8)
2890 pbCodeBuf[off++] = X86_OP_REX_B;
2891 else if (idxGstTmpReg >= 4)
2892 pbCodeBuf[off++] = X86_OP_REX;
2893 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2894 pbCodeBuf[off++] = u8Value;
2895
2896 /* rol reg64, 8 */
2897 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2898 pbCodeBuf[off++] = 0xc1;
2899 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2900 pbCodeBuf[off++] = 8;
2901 }
2902
2903#elif defined(RT_ARCH_ARM64)
2904 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2905 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2906 if (iGRegEx < 16)
2907 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2908 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2909 else
2910 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2911 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2912 iemNativeRegFreeTmp(pReNative, idxImmReg);
2913
2914#else
2915# error "Port me!"
2916#endif
2917
2918 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2919
2920#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2921 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2922#endif
2923
2924 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2925 return off;
2926}
2927
2928
2929#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2930 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2931
2932/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2933DECL_INLINE_THROW(uint32_t)
2934iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2935{
2936 Assert(iGRegEx < 20);
2937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2938
2939 /*
2940 * If it's a constant value (unlikely) we treat this as a
2941 * IEM_MC_STORE_GREG_U8_CONST statement.
2942 */
2943 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2944 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2945 { /* likely */ }
2946 else
2947 {
2948 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2949 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2950 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2951 }
2952
2953 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2954 kIemNativeGstRegUse_ForUpdate);
2955 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2956
2957#ifdef RT_ARCH_AMD64
2958 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2959 if (iGRegEx < 16)
2960 {
2961 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2962 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2963 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2964 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2965 pbCodeBuf[off++] = X86_OP_REX;
2966 pbCodeBuf[off++] = 0x8a;
2967 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2968 }
2969 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2970 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2971 {
2972 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2973 pbCodeBuf[off++] = 0x8a;
2974 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2975 }
2976 else
2977 {
2978 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2979
2980 /* ror reg64, 8 */
2981 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2982 pbCodeBuf[off++] = 0xc1;
2983 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2984 pbCodeBuf[off++] = 8;
2985
2986 /* mov reg8, reg8(r/m) */
2987 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2988 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2989 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2990 pbCodeBuf[off++] = X86_OP_REX;
2991 pbCodeBuf[off++] = 0x8a;
2992 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2993
2994 /* rol reg64, 8 */
2995 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2996 pbCodeBuf[off++] = 0xc1;
2997 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2998 pbCodeBuf[off++] = 8;
2999 }
3000
3001#elif defined(RT_ARCH_ARM64)
3002 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
3003 or
3004 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
3005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3006 if (iGRegEx < 16)
3007 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
3008 else
3009 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
3010
3011#else
3012# error "Port me!"
3013#endif
3014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3015
3016 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3017
3018#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3019 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3020#endif
3021 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3022 return off;
3023}
3024
3025
3026
3027#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
3028 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
3029
3030/** Emits code for IEM_MC_STORE_GREG_U16. */
3031DECL_INLINE_THROW(uint32_t)
3032iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
3033{
3034 Assert(iGReg < 16);
3035 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3036 kIemNativeGstRegUse_ForUpdate);
3037#ifdef RT_ARCH_AMD64
3038 /* mov reg16, imm16 */
3039 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3040 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3041 if (idxGstTmpReg >= 8)
3042 pbCodeBuf[off++] = X86_OP_REX_B;
3043 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3044 pbCodeBuf[off++] = RT_BYTE1(uValue);
3045 pbCodeBuf[off++] = RT_BYTE2(uValue);
3046
3047#elif defined(RT_ARCH_ARM64)
3048 /* movk xdst, #uValue, lsl #0 */
3049 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3050 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3051
3052#else
3053# error "Port me!"
3054#endif
3055
3056 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3057
3058#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3059 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3060#endif
3061 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3062 return off;
3063}
3064
3065
3066#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3067 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3068
3069/** Emits code for IEM_MC_STORE_GREG_U16. */
3070DECL_INLINE_THROW(uint32_t)
3071iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3072{
3073 Assert(iGReg < 16);
3074 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3075
3076 /*
3077 * If it's a constant value (unlikely) we treat this as a
3078 * IEM_MC_STORE_GREG_U16_CONST statement.
3079 */
3080 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3081 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3082 { /* likely */ }
3083 else
3084 {
3085 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3086 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3087 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3088 }
3089
3090 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3091 kIemNativeGstRegUse_ForUpdate);
3092
3093#ifdef RT_ARCH_AMD64
3094 /* mov reg16, reg16 or [mem16] */
3095 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3096 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3097 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3098 {
3099 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3100 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3101 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3102 pbCodeBuf[off++] = 0x8b;
3103 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3104 }
3105 else
3106 {
3107 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3108 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3109 if (idxGstTmpReg >= 8)
3110 pbCodeBuf[off++] = X86_OP_REX_R;
3111 pbCodeBuf[off++] = 0x8b;
3112 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3113 }
3114
3115#elif defined(RT_ARCH_ARM64)
3116 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3117 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3118 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3119 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3120 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3121
3122#else
3123# error "Port me!"
3124#endif
3125
3126 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3127
3128#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3129 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3130#endif
3131 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3132 return off;
3133}
3134
3135
3136#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3137 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3138
3139/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3140DECL_INLINE_THROW(uint32_t)
3141iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3142{
3143 Assert(iGReg < 16);
3144 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3145 kIemNativeGstRegUse_ForFullWrite);
3146 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3147#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3148 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3149#endif
3150 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3151 return off;
3152}
3153
3154
3155#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3156 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3157
3158/** Emits code for IEM_MC_STORE_GREG_U32. */
3159DECL_INLINE_THROW(uint32_t)
3160iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3161{
3162 Assert(iGReg < 16);
3163 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3164
3165 /*
3166 * If it's a constant value (unlikely) we treat this as a
3167 * IEM_MC_STORE_GREG_U32_CONST statement.
3168 */
3169 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3170 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3171 { /* likely */ }
3172 else
3173 {
3174 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3175 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3176 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3177 }
3178
3179 /*
3180 * For the rest we allocate a guest register for the variable and writes
3181 * it to the CPUMCTX structure.
3182 */
3183 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3184#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3185 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3186#else
3187 RT_NOREF(idxVarReg);
3188#endif
3189#ifdef VBOX_STRICT
3190 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3191#endif
3192 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3193 return off;
3194}
3195
3196
3197#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3198 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3199
3200/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3201DECL_INLINE_THROW(uint32_t)
3202iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3203{
3204 Assert(iGReg < 16);
3205 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3206 kIemNativeGstRegUse_ForFullWrite);
3207 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3208#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3209 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3210#endif
3211 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3212 return off;
3213}
3214
3215
3216#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3217 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3218
3219#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3220 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3221
3222/** Emits code for IEM_MC_STORE_GREG_U64. */
3223DECL_INLINE_THROW(uint32_t)
3224iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3225{
3226 Assert(iGReg < 16);
3227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3228
3229 /*
3230 * If it's a constant value (unlikely) we treat this as a
3231 * IEM_MC_STORE_GREG_U64_CONST statement.
3232 */
3233 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3234 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3235 { /* likely */ }
3236 else
3237 {
3238 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3239 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3240 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3241 }
3242
3243 /*
3244 * For the rest we allocate a guest register for the variable and writes
3245 * it to the CPUMCTX structure.
3246 */
3247 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3248#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3249 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3250#else
3251 RT_NOREF(idxVarReg);
3252#endif
3253 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3254 return off;
3255}
3256
3257
3258#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3259 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3260
3261/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3262DECL_INLINE_THROW(uint32_t)
3263iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3264{
3265 Assert(iGReg < 16);
3266 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3267 kIemNativeGstRegUse_ForUpdate);
3268 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3269#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3270 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3271#endif
3272 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3273 return off;
3274}
3275
3276
3277/*********************************************************************************************************************************
3278* General purpose register manipulation (add, sub). *
3279*********************************************************************************************************************************/
3280
3281#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3282 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3283
3284/** Emits code for IEM_MC_ADD_GREG_U16. */
3285DECL_INLINE_THROW(uint32_t)
3286iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3287{
3288 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3289 kIemNativeGstRegUse_ForUpdate);
3290
3291#ifdef RT_ARCH_AMD64
3292 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3293 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3294 if (idxGstTmpReg >= 8)
3295 pbCodeBuf[off++] = X86_OP_REX_B;
3296 if (uAddend == 1)
3297 {
3298 pbCodeBuf[off++] = 0xff; /* inc */
3299 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3300 }
3301 else
3302 {
3303 pbCodeBuf[off++] = 0x81;
3304 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3305 pbCodeBuf[off++] = uAddend;
3306 pbCodeBuf[off++] = 0;
3307 }
3308
3309#else
3310 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3311 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3312
3313 /* sub tmp, gstgrp, uAddend */
3314 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3315
3316 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3317 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3318
3319 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3320#endif
3321
3322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3323
3324#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3325 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3326#endif
3327
3328 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3329 return off;
3330}
3331
3332
3333#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3334 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3335
3336#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3337 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3338
3339/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3340DECL_INLINE_THROW(uint32_t)
3341iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3342{
3343 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3344 kIemNativeGstRegUse_ForUpdate);
3345
3346#ifdef RT_ARCH_AMD64
3347 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3348 if (f64Bit)
3349 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3350 else if (idxGstTmpReg >= 8)
3351 pbCodeBuf[off++] = X86_OP_REX_B;
3352 if (uAddend == 1)
3353 {
3354 pbCodeBuf[off++] = 0xff; /* inc */
3355 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3356 }
3357 else if (uAddend < 128)
3358 {
3359 pbCodeBuf[off++] = 0x83; /* add */
3360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3361 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3362 }
3363 else
3364 {
3365 pbCodeBuf[off++] = 0x81; /* add */
3366 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3367 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3368 pbCodeBuf[off++] = 0;
3369 pbCodeBuf[off++] = 0;
3370 pbCodeBuf[off++] = 0;
3371 }
3372
3373#else
3374 /* sub tmp, gstgrp, uAddend */
3375 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3376 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3377
3378#endif
3379
3380 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3381
3382#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3383 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3384#endif
3385
3386 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3387 return off;
3388}
3389
3390
3391
3392#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3393 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3394
3395/** Emits code for IEM_MC_SUB_GREG_U16. */
3396DECL_INLINE_THROW(uint32_t)
3397iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3398{
3399 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3400 kIemNativeGstRegUse_ForUpdate);
3401
3402#ifdef RT_ARCH_AMD64
3403 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3404 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3405 if (idxGstTmpReg >= 8)
3406 pbCodeBuf[off++] = X86_OP_REX_B;
3407 if (uSubtrahend == 1)
3408 {
3409 pbCodeBuf[off++] = 0xff; /* dec */
3410 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3411 }
3412 else
3413 {
3414 pbCodeBuf[off++] = 0x81;
3415 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3416 pbCodeBuf[off++] = uSubtrahend;
3417 pbCodeBuf[off++] = 0;
3418 }
3419
3420#else
3421 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3422 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3423
3424 /* sub tmp, gstgrp, uSubtrahend */
3425 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3426
3427 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3428 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3429
3430 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3431#endif
3432
3433 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3434
3435#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3436 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3437#endif
3438
3439 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3440 return off;
3441}
3442
3443
3444#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3445 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3446
3447#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3448 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3449
3450/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3451DECL_INLINE_THROW(uint32_t)
3452iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3453{
3454 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3455 kIemNativeGstRegUse_ForUpdate);
3456
3457#ifdef RT_ARCH_AMD64
3458 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3459 if (f64Bit)
3460 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3461 else if (idxGstTmpReg >= 8)
3462 pbCodeBuf[off++] = X86_OP_REX_B;
3463 if (uSubtrahend == 1)
3464 {
3465 pbCodeBuf[off++] = 0xff; /* dec */
3466 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3467 }
3468 else if (uSubtrahend < 128)
3469 {
3470 pbCodeBuf[off++] = 0x83; /* sub */
3471 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3472 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3473 }
3474 else
3475 {
3476 pbCodeBuf[off++] = 0x81; /* sub */
3477 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3478 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3479 pbCodeBuf[off++] = 0;
3480 pbCodeBuf[off++] = 0;
3481 pbCodeBuf[off++] = 0;
3482 }
3483
3484#else
3485 /* sub tmp, gstgrp, uSubtrahend */
3486 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3487 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3488
3489#endif
3490
3491 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3492
3493#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3494 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3495#endif
3496
3497 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3498 return off;
3499}
3500
3501
3502#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3503 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3504
3505#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3506 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3507
3508#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3509 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3510
3511#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3512 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3513
3514/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3515DECL_INLINE_THROW(uint32_t)
3516iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3517{
3518#ifdef VBOX_STRICT
3519 switch (cbMask)
3520 {
3521 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3522 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3523 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3524 case sizeof(uint64_t): break;
3525 default: AssertFailedBreak();
3526 }
3527#endif
3528
3529 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3530 kIemNativeGstRegUse_ForUpdate);
3531
3532 switch (cbMask)
3533 {
3534 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3535 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3536 break;
3537 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3538 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3539 break;
3540 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3541 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3542 break;
3543 case sizeof(uint64_t):
3544 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3545 break;
3546 default: AssertFailedBreak();
3547 }
3548
3549 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3550
3551#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3552 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3553#endif
3554
3555 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3556 return off;
3557}
3558
3559
3560#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3561 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3562
3563#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3564 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3565
3566#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3567 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3568
3569#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3570 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3571
3572/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3573DECL_INLINE_THROW(uint32_t)
3574iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3575{
3576#ifdef VBOX_STRICT
3577 switch (cbMask)
3578 {
3579 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3580 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3581 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3582 case sizeof(uint64_t): break;
3583 default: AssertFailedBreak();
3584 }
3585#endif
3586
3587 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3588 kIemNativeGstRegUse_ForUpdate);
3589
3590 switch (cbMask)
3591 {
3592 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3593 case sizeof(uint16_t):
3594 case sizeof(uint64_t):
3595 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3596 break;
3597 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3598 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3599 break;
3600 default: AssertFailedBreak();
3601 }
3602
3603 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3604
3605#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3606 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3607#endif
3608
3609 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3610 return off;
3611}
3612
3613
3614/*********************************************************************************************************************************
3615* Local/Argument variable manipulation (add, sub, and, or). *
3616*********************************************************************************************************************************/
3617
3618#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3619 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3620
3621#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3622 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3623
3624#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3625 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3626
3627#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3628 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3629
3630
3631#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3632 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3633
3634#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3635 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3636
3637#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3638 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3639
3640/** Emits code for AND'ing a local and a constant value. */
3641DECL_INLINE_THROW(uint32_t)
3642iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3643{
3644#ifdef VBOX_STRICT
3645 switch (cbMask)
3646 {
3647 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3648 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3649 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3650 case sizeof(uint64_t): break;
3651 default: AssertFailedBreak();
3652 }
3653#endif
3654
3655 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3656 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3657
3658 if (cbMask <= sizeof(uint32_t))
3659 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3660 else
3661 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3662
3663 iemNativeVarRegisterRelease(pReNative, idxVar);
3664 return off;
3665}
3666
3667
3668#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3669 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3670
3671#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3672 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3673
3674#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3675 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3676
3677#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3678 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3679
3680/** Emits code for OR'ing a local and a constant value. */
3681DECL_INLINE_THROW(uint32_t)
3682iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3683{
3684#ifdef VBOX_STRICT
3685 switch (cbMask)
3686 {
3687 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3688 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3689 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3690 case sizeof(uint64_t): break;
3691 default: AssertFailedBreak();
3692 }
3693#endif
3694
3695 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3696 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3697
3698 if (cbMask <= sizeof(uint32_t))
3699 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3700 else
3701 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3702
3703 iemNativeVarRegisterRelease(pReNative, idxVar);
3704 return off;
3705}
3706
3707
3708#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3709 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3710
3711#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3712 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3713
3714#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3715 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3716
3717/** Emits code for reversing the byte order in a local value. */
3718DECL_INLINE_THROW(uint32_t)
3719iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3720{
3721 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3723
3724 switch (cbLocal)
3725 {
3726 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3727 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3728 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3729 default: AssertFailedBreak();
3730 }
3731
3732 iemNativeVarRegisterRelease(pReNative, idxVar);
3733 return off;
3734}
3735
3736
3737#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3738 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3739
3740#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3741 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3742
3743#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3744 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3745
3746/** Emits code for shifting left a local value. */
3747DECL_INLINE_THROW(uint32_t)
3748iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3749{
3750#ifdef VBOX_STRICT
3751 switch (cbLocal)
3752 {
3753 case sizeof(uint8_t): Assert(cShift < 8); break;
3754 case sizeof(uint16_t): Assert(cShift < 16); break;
3755 case sizeof(uint32_t): Assert(cShift < 32); break;
3756 case sizeof(uint64_t): Assert(cShift < 64); break;
3757 default: AssertFailedBreak();
3758 }
3759#endif
3760
3761 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3762 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3763
3764 if (cbLocal <= sizeof(uint32_t))
3765 {
3766 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3767 if (cbLocal < sizeof(uint32_t))
3768 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3769 cbLocal == sizeof(uint16_t)
3770 ? UINT32_C(0xffff)
3771 : UINT32_C(0xff));
3772 }
3773 else
3774 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3775
3776 iemNativeVarRegisterRelease(pReNative, idxVar);
3777 return off;
3778}
3779
3780
3781#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3782 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3783
3784#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3785 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3786
3787#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3788 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3789
3790/** Emits code for shifting left a local value. */
3791DECL_INLINE_THROW(uint32_t)
3792iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3793{
3794#ifdef VBOX_STRICT
3795 switch (cbLocal)
3796 {
3797 case sizeof(int8_t): Assert(cShift < 8); break;
3798 case sizeof(int16_t): Assert(cShift < 16); break;
3799 case sizeof(int32_t): Assert(cShift < 32); break;
3800 case sizeof(int64_t): Assert(cShift < 64); break;
3801 default: AssertFailedBreak();
3802 }
3803#endif
3804
3805 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3807
3808 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3809 if (cbLocal == sizeof(uint8_t))
3810 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3811 else if (cbLocal == sizeof(uint16_t))
3812 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3813
3814 if (cbLocal <= sizeof(uint32_t))
3815 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3816 else
3817 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3818
3819 iemNativeVarRegisterRelease(pReNative, idxVar);
3820 return off;
3821}
3822
3823
3824#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3825 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3826
3827#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3828 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3829
3830#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3831 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3832
3833/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3834DECL_INLINE_THROW(uint32_t)
3835iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3836{
3837 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3838 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3839 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3840 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3841
3842 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3843 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3844
3845 /* Need to sign extend the value. */
3846 if (cbLocal <= sizeof(uint32_t))
3847 {
3848/** @todo ARM64: In case of boredone, the extended add instruction can do the
3849 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3850 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3851
3852 switch (cbLocal)
3853 {
3854 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3855 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3856 default: AssertFailed();
3857 }
3858
3859 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3860 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3861 }
3862 else
3863 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3864
3865 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3866 iemNativeVarRegisterRelease(pReNative, idxVar);
3867 return off;
3868}
3869
3870
3871
3872/*********************************************************************************************************************************
3873* EFLAGS *
3874*********************************************************************************************************************************/
3875
3876#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3877# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3878#else
3879# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3880 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3881
3882DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3883{
3884 if (fEflOutput)
3885 {
3886 PVMCPUCC const pVCpu = pReNative->pVCpu;
3887# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3888 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3889 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3890 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3891# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3892 if (fEflOutput & (a_fEfl)) \
3893 { \
3894 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3895 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3896 else \
3897 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3898 } else do { } while (0)
3899# else
3900 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3901 IEMLIVENESSBIT const LivenessClobbered =
3902 {
3903 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3904 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3905 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3906 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3907 };
3908 IEMLIVENESSBIT const LivenessDelayable =
3909 {
3910 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3911 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3912 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3913 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3914 };
3915# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3916 if (fEflOutput & (a_fEfl)) \
3917 { \
3918 if (LivenessClobbered.a_fLivenessMember) \
3919 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3920 else if (LivenessDelayable.a_fLivenessMember) \
3921 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3922 else \
3923 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3924 } else do { } while (0)
3925# endif
3926 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3927 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3928 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3929 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3930 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3931 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3932 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3933# undef CHECK_FLAG_AND_UPDATE_STATS
3934 }
3935 RT_NOREF(fEflInput);
3936}
3937#endif /* VBOX_WITH_STATISTICS */
3938
3939#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3940#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3941 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3942
3943/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3944DECL_INLINE_THROW(uint32_t)
3945iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3946 uint32_t fEflInput, uint32_t fEflOutput)
3947{
3948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3949 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3950 RT_NOREF(fEflInput, fEflOutput);
3951
3952#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3953# ifdef VBOX_STRICT
3954 if ( pReNative->idxCurCall != 0
3955 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3956 {
3957 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3958 uint32_t const fBoth = fEflInput | fEflOutput;
3959# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3960 AssertMsg( !(fBoth & (a_fElfConst)) \
3961 || (!(fEflInput & (a_fElfConst)) \
3962 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3963 : !(fEflOutput & (a_fElfConst)) \
3964 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3965 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3966 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3967 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3968 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3969 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3970 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3971 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3972 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3973 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3974# undef ASSERT_ONE_EFL
3975 }
3976# endif
3977#endif
3978
3979 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3980
3981 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3982 * the existing shadow copy. */
3983 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3984 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3985 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
3986 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3987 return off;
3988}
3989
3990
3991
3992/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
3993 * start using it with custom native code emission (inlining assembly
3994 * instruction helpers). */
3995#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
3996#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3997 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3998 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
3999
4000#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
4001#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4002 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4003 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
4004
4005/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
4006DECL_INLINE_THROW(uint32_t)
4007iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
4008 bool fUpdateSkipping)
4009{
4010 RT_NOREF(fEflOutput);
4011 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
4012 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4013
4014#ifdef VBOX_STRICT
4015 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
4016 uint32_t offFixup = off;
4017 off = iemNativeEmitJnzToFixed(pReNative, off, off);
4018 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
4019 iemNativeFixupFixedJump(pReNative, offFixup, off);
4020
4021 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
4022 offFixup = off;
4023 off = iemNativeEmitJzToFixed(pReNative, off, off);
4024 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
4025 iemNativeFixupFixedJump(pReNative, offFixup, off);
4026
4027 /** @todo validate that only bits in the fElfOutput mask changed. */
4028#endif
4029
4030#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4031 if (fUpdateSkipping)
4032 {
4033 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4034 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4035 else
4036 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4037 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4038 }
4039#else
4040 RT_NOREF_PV(fUpdateSkipping);
4041#endif
4042
4043 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4044 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4045 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4046 return off;
4047}
4048
4049
4050
4051/*********************************************************************************************************************************
4052* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4053*********************************************************************************************************************************/
4054
4055#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4056 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4057
4058#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4059 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4060
4061#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4062 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4063
4064
4065/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4066 * IEM_MC_FETCH_SREG_ZX_U64. */
4067DECL_INLINE_THROW(uint32_t)
4068iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4069{
4070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4071 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4072 Assert(iSReg < X86_SREG_COUNT);
4073
4074 /*
4075 * For now, we will not create a shadow copy of a selector. The rational
4076 * is that since we do not recompile the popping and loading of segment
4077 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4078 * pushing and moving to registers, there is only a small chance that the
4079 * shadow copy will be accessed again before the register is reloaded. One
4080 * scenario would be nested called in 16-bit code, but I doubt it's worth
4081 * the extra register pressure atm.
4082 *
4083 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4084 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4085 * store scencario covered at present (r160730).
4086 */
4087 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4088 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4089 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4090 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4091 return off;
4092}
4093
4094
4095
4096/*********************************************************************************************************************************
4097* Register references. *
4098*********************************************************************************************************************************/
4099
4100#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4101 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4102
4103#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4104 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4105
4106/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4107DECL_INLINE_THROW(uint32_t)
4108iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4109{
4110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4111 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4112 Assert(iGRegEx < 20);
4113
4114 if (iGRegEx < 16)
4115 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4116 else
4117 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4118
4119 /* If we've delayed writing back the register value, flush it now. */
4120 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4121
4122 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4123 if (!fConst)
4124 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4125
4126 return off;
4127}
4128
4129#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4130 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4131
4132#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4133 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4134
4135#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4136 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4137
4138#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4139 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4140
4141#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4142 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4143
4144#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4145 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4146
4147#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4148 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4149
4150#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4151 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4152
4153#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4154 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4155
4156#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4157 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4158
4159/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4160DECL_INLINE_THROW(uint32_t)
4161iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4162{
4163 Assert(iGReg < 16);
4164 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4165 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4166
4167 /* If we've delayed writing back the register value, flush it now. */
4168 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4169
4170 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4171 if (!fConst)
4172 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4173
4174 return off;
4175}
4176
4177
4178#undef IEM_MC_REF_EFLAGS /* should not be used. */
4179#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4180 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4181 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4182
4183/** Handles IEM_MC_REF_EFLAGS. */
4184DECL_INLINE_THROW(uint32_t)
4185iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4186{
4187 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4188 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4189
4190#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4191 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4192
4193 /* Updating the skipping according to the outputs is a little early, but
4194 we don't have any other hooks for references atm. */
4195 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4196 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4197 else if (fEflOutput & X86_EFL_STATUS_BITS)
4198 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4199 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4200#else
4201 RT_NOREF(fEflInput, fEflOutput);
4202#endif
4203
4204 /* If we've delayed writing back the register value, flush it now. */
4205 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4206
4207 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4208 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4209
4210 return off;
4211}
4212
4213
4214/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4215 * different code from threaded recompiler, maybe it would be helpful. For now
4216 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4217#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4218
4219
4220#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4221 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4222
4223#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4224 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4225
4226#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4227 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4228
4229#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4230/* Just being paranoid here. */
4231# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4232AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4233AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4234AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4235AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4236# endif
4237AssertCompileMemberOffset(X86XMMREG, au64, 0);
4238AssertCompileMemberOffset(X86XMMREG, au32, 0);
4239AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4240AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4241
4242# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4243 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4244# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4245 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4246# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4247 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4248# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4249 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4250#endif
4251
4252/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4253DECL_INLINE_THROW(uint32_t)
4254iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4255{
4256 Assert(iXReg < 16);
4257 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4258 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4259
4260 /* If we've delayed writing back the register value, flush it now. */
4261 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4262
4263#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4264 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4265 if (!fConst)
4266 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4267#else
4268 RT_NOREF(fConst);
4269#endif
4270
4271 return off;
4272}
4273
4274
4275#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
4276 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
4277
4278/** Handles IEM_MC_REF_MXCSR. */
4279DECL_INLINE_THROW(uint32_t)
4280iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
4281{
4282 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
4283 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4284
4285 /* If we've delayed writing back the register value, flush it now. */
4286 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
4287
4288 /* If there is a shadow copy of guest MXCSR, flush it now. */
4289 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
4290
4291 return off;
4292}
4293
4294
4295
4296/*********************************************************************************************************************************
4297* Effective Address Calculation *
4298*********************************************************************************************************************************/
4299#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4300 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4301
4302/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4303 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4304DECL_INLINE_THROW(uint32_t)
4305iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4306 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4307{
4308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4309
4310 /*
4311 * Handle the disp16 form with no registers first.
4312 *
4313 * Convert to an immediate value, as that'll delay the register allocation
4314 * and assignment till the memory access / call / whatever and we can use
4315 * a more appropriate register (or none at all).
4316 */
4317 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4318 {
4319 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4320 return off;
4321 }
4322
4323 /* Determin the displacment. */
4324 uint16_t u16EffAddr;
4325 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4326 {
4327 case 0: u16EffAddr = 0; break;
4328 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4329 case 2: u16EffAddr = u16Disp; break;
4330 default: AssertFailedStmt(u16EffAddr = 0);
4331 }
4332
4333 /* Determine the registers involved. */
4334 uint8_t idxGstRegBase;
4335 uint8_t idxGstRegIndex;
4336 switch (bRm & X86_MODRM_RM_MASK)
4337 {
4338 case 0:
4339 idxGstRegBase = X86_GREG_xBX;
4340 idxGstRegIndex = X86_GREG_xSI;
4341 break;
4342 case 1:
4343 idxGstRegBase = X86_GREG_xBX;
4344 idxGstRegIndex = X86_GREG_xDI;
4345 break;
4346 case 2:
4347 idxGstRegBase = X86_GREG_xBP;
4348 idxGstRegIndex = X86_GREG_xSI;
4349 break;
4350 case 3:
4351 idxGstRegBase = X86_GREG_xBP;
4352 idxGstRegIndex = X86_GREG_xDI;
4353 break;
4354 case 4:
4355 idxGstRegBase = X86_GREG_xSI;
4356 idxGstRegIndex = UINT8_MAX;
4357 break;
4358 case 5:
4359 idxGstRegBase = X86_GREG_xDI;
4360 idxGstRegIndex = UINT8_MAX;
4361 break;
4362 case 6:
4363 idxGstRegBase = X86_GREG_xBP;
4364 idxGstRegIndex = UINT8_MAX;
4365 break;
4366#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4367 default:
4368#endif
4369 case 7:
4370 idxGstRegBase = X86_GREG_xBX;
4371 idxGstRegIndex = UINT8_MAX;
4372 break;
4373 }
4374
4375 /*
4376 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4377 */
4378 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4379 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4380 kIemNativeGstRegUse_ReadOnly);
4381 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4382 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4383 kIemNativeGstRegUse_ReadOnly)
4384 : UINT8_MAX;
4385#ifdef RT_ARCH_AMD64
4386 if (idxRegIndex == UINT8_MAX)
4387 {
4388 if (u16EffAddr == 0)
4389 {
4390 /* movxz ret, base */
4391 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4392 }
4393 else
4394 {
4395 /* lea ret32, [base64 + disp32] */
4396 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4397 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4398 if (idxRegRet >= 8 || idxRegBase >= 8)
4399 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4400 pbCodeBuf[off++] = 0x8d;
4401 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4402 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4403 else
4404 {
4405 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4406 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4407 }
4408 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4409 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4410 pbCodeBuf[off++] = 0;
4411 pbCodeBuf[off++] = 0;
4412 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4413
4414 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4415 }
4416 }
4417 else
4418 {
4419 /* lea ret32, [index64 + base64 (+ disp32)] */
4420 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4421 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4422 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4423 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4424 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4425 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4426 pbCodeBuf[off++] = 0x8d;
4427 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4428 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4429 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4430 if (bMod == X86_MOD_MEM4)
4431 {
4432 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4433 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4434 pbCodeBuf[off++] = 0;
4435 pbCodeBuf[off++] = 0;
4436 }
4437 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4438 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4439 }
4440
4441#elif defined(RT_ARCH_ARM64)
4442 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4443 if (u16EffAddr == 0)
4444 {
4445 if (idxRegIndex == UINT8_MAX)
4446 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4447 else
4448 {
4449 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4450 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4451 }
4452 }
4453 else
4454 {
4455 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4456 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4457 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4458 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4459 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4460 else
4461 {
4462 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4463 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4464 }
4465 if (idxRegIndex != UINT8_MAX)
4466 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4467 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4468 }
4469
4470#else
4471# error "port me"
4472#endif
4473
4474 if (idxRegIndex != UINT8_MAX)
4475 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4476 iemNativeRegFreeTmp(pReNative, idxRegBase);
4477 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4478 return off;
4479}
4480
4481
4482#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4483 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4484
4485/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4486 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4487DECL_INLINE_THROW(uint32_t)
4488iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4489 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4490{
4491 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4492
4493 /*
4494 * Handle the disp32 form with no registers first.
4495 *
4496 * Convert to an immediate value, as that'll delay the register allocation
4497 * and assignment till the memory access / call / whatever and we can use
4498 * a more appropriate register (or none at all).
4499 */
4500 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4501 {
4502 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4503 return off;
4504 }
4505
4506 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4507 uint32_t u32EffAddr = 0;
4508 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4509 {
4510 case 0: break;
4511 case 1: u32EffAddr = (int8_t)u32Disp; break;
4512 case 2: u32EffAddr = u32Disp; break;
4513 default: AssertFailed();
4514 }
4515
4516 /* Get the register (or SIB) value. */
4517 uint8_t idxGstRegBase = UINT8_MAX;
4518 uint8_t idxGstRegIndex = UINT8_MAX;
4519 uint8_t cShiftIndex = 0;
4520 switch (bRm & X86_MODRM_RM_MASK)
4521 {
4522 case 0: idxGstRegBase = X86_GREG_xAX; break;
4523 case 1: idxGstRegBase = X86_GREG_xCX; break;
4524 case 2: idxGstRegBase = X86_GREG_xDX; break;
4525 case 3: idxGstRegBase = X86_GREG_xBX; break;
4526 case 4: /* SIB */
4527 {
4528 /* index /w scaling . */
4529 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4530 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4531 {
4532 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4533 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4534 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4535 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4536 case 4: cShiftIndex = 0; /*no index*/ break;
4537 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4538 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4539 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4540 }
4541
4542 /* base */
4543 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4544 {
4545 case 0: idxGstRegBase = X86_GREG_xAX; break;
4546 case 1: idxGstRegBase = X86_GREG_xCX; break;
4547 case 2: idxGstRegBase = X86_GREG_xDX; break;
4548 case 3: idxGstRegBase = X86_GREG_xBX; break;
4549 case 4:
4550 idxGstRegBase = X86_GREG_xSP;
4551 u32EffAddr += uSibAndRspOffset >> 8;
4552 break;
4553 case 5:
4554 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4555 idxGstRegBase = X86_GREG_xBP;
4556 else
4557 {
4558 Assert(u32EffAddr == 0);
4559 u32EffAddr = u32Disp;
4560 }
4561 break;
4562 case 6: idxGstRegBase = X86_GREG_xSI; break;
4563 case 7: idxGstRegBase = X86_GREG_xDI; break;
4564 }
4565 break;
4566 }
4567 case 5: idxGstRegBase = X86_GREG_xBP; break;
4568 case 6: idxGstRegBase = X86_GREG_xSI; break;
4569 case 7: idxGstRegBase = X86_GREG_xDI; break;
4570 }
4571
4572 /*
4573 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4574 * the start of the function.
4575 */
4576 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4577 {
4578 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4579 return off;
4580 }
4581
4582 /*
4583 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4584 */
4585 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4586 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4587 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4588 kIemNativeGstRegUse_ReadOnly);
4589 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4590 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4591 kIemNativeGstRegUse_ReadOnly);
4592
4593 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4594 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4595 {
4596 idxRegBase = idxRegIndex;
4597 idxRegIndex = UINT8_MAX;
4598 }
4599
4600#ifdef RT_ARCH_AMD64
4601 if (idxRegIndex == UINT8_MAX)
4602 {
4603 if (u32EffAddr == 0)
4604 {
4605 /* mov ret, base */
4606 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4607 }
4608 else
4609 {
4610 /* lea ret32, [base64 + disp32] */
4611 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4612 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4613 if (idxRegRet >= 8 || idxRegBase >= 8)
4614 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4615 pbCodeBuf[off++] = 0x8d;
4616 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4617 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4618 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4619 else
4620 {
4621 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4622 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4623 }
4624 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4625 if (bMod == X86_MOD_MEM4)
4626 {
4627 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4628 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4629 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4630 }
4631 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4632 }
4633 }
4634 else
4635 {
4636 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4637 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4638 if (idxRegBase == UINT8_MAX)
4639 {
4640 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4641 if (idxRegRet >= 8 || idxRegIndex >= 8)
4642 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4643 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4644 pbCodeBuf[off++] = 0x8d;
4645 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4646 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4647 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4648 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4649 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4650 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4651 }
4652 else
4653 {
4654 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4655 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4656 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4657 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4658 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4659 pbCodeBuf[off++] = 0x8d;
4660 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4661 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4662 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4663 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4664 if (bMod != X86_MOD_MEM0)
4665 {
4666 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4667 if (bMod == X86_MOD_MEM4)
4668 {
4669 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4670 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4671 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4672 }
4673 }
4674 }
4675 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4676 }
4677
4678#elif defined(RT_ARCH_ARM64)
4679 if (u32EffAddr == 0)
4680 {
4681 if (idxRegIndex == UINT8_MAX)
4682 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4683 else if (idxRegBase == UINT8_MAX)
4684 {
4685 if (cShiftIndex == 0)
4686 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4687 else
4688 {
4689 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4690 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4691 }
4692 }
4693 else
4694 {
4695 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4696 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4697 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4698 }
4699 }
4700 else
4701 {
4702 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4703 {
4704 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4705 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4706 }
4707 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4708 {
4709 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4710 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4711 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4712 }
4713 else
4714 {
4715 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4716 if (idxRegBase != UINT8_MAX)
4717 {
4718 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4719 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4720 }
4721 }
4722 if (idxRegIndex != UINT8_MAX)
4723 {
4724 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4725 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4726 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4727 }
4728 }
4729
4730#else
4731# error "port me"
4732#endif
4733
4734 if (idxRegIndex != UINT8_MAX)
4735 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4736 if (idxRegBase != UINT8_MAX)
4737 iemNativeRegFreeTmp(pReNative, idxRegBase);
4738 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4739 return off;
4740}
4741
4742
4743#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4744 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4745 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4746
4747#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4748 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4749 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4750
4751#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4752 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4753 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4754
4755/**
4756 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4757 *
4758 * @returns New off.
4759 * @param pReNative .
4760 * @param off .
4761 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4762 * bit 4 to REX.X. The two bits are part of the
4763 * REG sub-field, which isn't needed in this
4764 * function.
4765 * @param uSibAndRspOffset Two parts:
4766 * - The first 8 bits make up the SIB byte.
4767 * - The next 8 bits are the fixed RSP/ESP offset
4768 * in case of a pop [xSP].
4769 * @param u32Disp The displacement byte/word/dword, if any.
4770 * @param cbInstr The size of the fully decoded instruction. Used
4771 * for RIP relative addressing.
4772 * @param idxVarRet The result variable number.
4773 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4774 * when calculating the address.
4775 *
4776 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4777 */
4778DECL_INLINE_THROW(uint32_t)
4779iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4780 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4781{
4782 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4783
4784 /*
4785 * Special case the rip + disp32 form first.
4786 */
4787 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4788 {
4789#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4790 /* Need to take the current PC offset into account for the displacement, no need to flush here
4791 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4792 u32Disp += pReNative->Core.offPc;
4793#endif
4794
4795 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4796 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4797 kIemNativeGstRegUse_ReadOnly);
4798#ifdef RT_ARCH_AMD64
4799 if (f64Bit)
4800 {
4801 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4802 if ((int32_t)offFinalDisp == offFinalDisp)
4803 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4804 else
4805 {
4806 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4807 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4808 }
4809 }
4810 else
4811 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4812
4813#elif defined(RT_ARCH_ARM64)
4814 if (f64Bit)
4815 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4816 (int64_t)(int32_t)u32Disp + cbInstr);
4817 else
4818 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4819 (int32_t)u32Disp + cbInstr);
4820
4821#else
4822# error "Port me!"
4823#endif
4824 iemNativeRegFreeTmp(pReNative, idxRegPc);
4825 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4826 return off;
4827 }
4828
4829 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4830 int64_t i64EffAddr = 0;
4831 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4832 {
4833 case 0: break;
4834 case 1: i64EffAddr = (int8_t)u32Disp; break;
4835 case 2: i64EffAddr = (int32_t)u32Disp; break;
4836 default: AssertFailed();
4837 }
4838
4839 /* Get the register (or SIB) value. */
4840 uint8_t idxGstRegBase = UINT8_MAX;
4841 uint8_t idxGstRegIndex = UINT8_MAX;
4842 uint8_t cShiftIndex = 0;
4843 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4844 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4845 else /* SIB: */
4846 {
4847 /* index /w scaling . */
4848 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4849 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4850 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4851 if (idxGstRegIndex == 4)
4852 {
4853 /* no index */
4854 cShiftIndex = 0;
4855 idxGstRegIndex = UINT8_MAX;
4856 }
4857
4858 /* base */
4859 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4860 if (idxGstRegBase == 4)
4861 {
4862 /* pop [rsp] hack */
4863 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4864 }
4865 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4866 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4867 {
4868 /* mod=0 and base=5 -> disp32, no base reg. */
4869 Assert(i64EffAddr == 0);
4870 i64EffAddr = (int32_t)u32Disp;
4871 idxGstRegBase = UINT8_MAX;
4872 }
4873 }
4874
4875 /*
4876 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4877 * the start of the function.
4878 */
4879 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4880 {
4881 if (f64Bit)
4882 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4883 else
4884 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4885 return off;
4886 }
4887
4888 /*
4889 * Now emit code that calculates:
4890 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4891 * or if !f64Bit:
4892 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4893 */
4894 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4895 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4896 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4897 kIemNativeGstRegUse_ReadOnly);
4898 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4899 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4900 kIemNativeGstRegUse_ReadOnly);
4901
4902 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4903 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4904 {
4905 idxRegBase = idxRegIndex;
4906 idxRegIndex = UINT8_MAX;
4907 }
4908
4909#ifdef RT_ARCH_AMD64
4910 uint8_t bFinalAdj;
4911 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4912 bFinalAdj = 0; /* likely */
4913 else
4914 {
4915 /* pop [rsp] with a problematic disp32 value. Split out the
4916 RSP offset and add it separately afterwards (bFinalAdj). */
4917 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4918 Assert(idxGstRegBase == X86_GREG_xSP);
4919 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4920 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4921 Assert(bFinalAdj != 0);
4922 i64EffAddr -= bFinalAdj;
4923 Assert((int32_t)i64EffAddr == i64EffAddr);
4924 }
4925 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4926//pReNative->pInstrBuf[off++] = 0xcc;
4927
4928 if (idxRegIndex == UINT8_MAX)
4929 {
4930 if (u32EffAddr == 0)
4931 {
4932 /* mov ret, base */
4933 if (f64Bit)
4934 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4935 else
4936 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4937 }
4938 else
4939 {
4940 /* lea ret, [base + disp32] */
4941 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4942 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4943 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4944 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4945 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4946 | (f64Bit ? X86_OP_REX_W : 0);
4947 pbCodeBuf[off++] = 0x8d;
4948 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4949 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4950 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4951 else
4952 {
4953 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4954 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4955 }
4956 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4957 if (bMod == X86_MOD_MEM4)
4958 {
4959 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4960 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4961 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4962 }
4963 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4964 }
4965 }
4966 else
4967 {
4968 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4969 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4970 if (idxRegBase == UINT8_MAX)
4971 {
4972 /* lea ret, [(index64 << cShiftIndex) + disp32] */
4973 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
4974 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4975 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4976 | (f64Bit ? X86_OP_REX_W : 0);
4977 pbCodeBuf[off++] = 0x8d;
4978 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4979 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4980 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4981 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4982 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4983 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4984 }
4985 else
4986 {
4987 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4988 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4989 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4990 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4991 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4992 | (f64Bit ? X86_OP_REX_W : 0);
4993 pbCodeBuf[off++] = 0x8d;
4994 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4995 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4996 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4997 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4998 if (bMod != X86_MOD_MEM0)
4999 {
5000 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5001 if (bMod == X86_MOD_MEM4)
5002 {
5003 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5004 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5005 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5006 }
5007 }
5008 }
5009 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5010 }
5011
5012 if (!bFinalAdj)
5013 { /* likely */ }
5014 else
5015 {
5016 Assert(f64Bit);
5017 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
5018 }
5019
5020#elif defined(RT_ARCH_ARM64)
5021 if (i64EffAddr == 0)
5022 {
5023 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5024 if (idxRegIndex == UINT8_MAX)
5025 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
5026 else if (idxRegBase != UINT8_MAX)
5027 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5028 f64Bit, false /*fSetFlags*/, cShiftIndex);
5029 else
5030 {
5031 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
5032 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
5033 }
5034 }
5035 else
5036 {
5037 if (f64Bit)
5038 { /* likely */ }
5039 else
5040 i64EffAddr = (int32_t)i64EffAddr;
5041
5042 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
5043 {
5044 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5045 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5046 }
5047 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5048 {
5049 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5050 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5051 }
5052 else
5053 {
5054 if (f64Bit)
5055 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5056 else
5057 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5058 if (idxRegBase != UINT8_MAX)
5059 {
5060 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5061 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5062 }
5063 }
5064 if (idxRegIndex != UINT8_MAX)
5065 {
5066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5067 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5068 f64Bit, false /*fSetFlags*/, cShiftIndex);
5069 }
5070 }
5071
5072#else
5073# error "port me"
5074#endif
5075
5076 if (idxRegIndex != UINT8_MAX)
5077 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5078 if (idxRegBase != UINT8_MAX)
5079 iemNativeRegFreeTmp(pReNative, idxRegBase);
5080 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5081 return off;
5082}
5083
5084
5085/*********************************************************************************************************************************
5086* Memory fetches and stores common *
5087*********************************************************************************************************************************/
5088
5089typedef enum IEMNATIVEMITMEMOP
5090{
5091 kIemNativeEmitMemOp_Store = 0,
5092 kIemNativeEmitMemOp_Fetch,
5093 kIemNativeEmitMemOp_Fetch_Zx_U16,
5094 kIemNativeEmitMemOp_Fetch_Zx_U32,
5095 kIemNativeEmitMemOp_Fetch_Zx_U64,
5096 kIemNativeEmitMemOp_Fetch_Sx_U16,
5097 kIemNativeEmitMemOp_Fetch_Sx_U32,
5098 kIemNativeEmitMemOp_Fetch_Sx_U64
5099} IEMNATIVEMITMEMOP;
5100
5101/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5102 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5103 * (with iSegReg = UINT8_MAX). */
5104DECL_INLINE_THROW(uint32_t)
5105iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5106 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5107 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5108{
5109 /*
5110 * Assert sanity.
5111 */
5112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5113 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5114 Assert( enmOp != kIemNativeEmitMemOp_Store
5115 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5116 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5118 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5119 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5120 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5121 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5122 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5124 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5125 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5126#else
5127 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5128#endif
5129 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5130#ifdef VBOX_STRICT
5131 if (iSegReg == UINT8_MAX)
5132 {
5133 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5134 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5135 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5136 switch (cbMem)
5137 {
5138 case 1:
5139 Assert( pfnFunction
5140 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5141 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5142 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5143 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5144 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5145 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5146 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5147 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5148 : UINT64_C(0xc000b000a0009000) ));
5149 break;
5150 case 2:
5151 Assert( pfnFunction
5152 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5153 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5154 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5155 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5156 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5157 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5158 : UINT64_C(0xc000b000a0009000) ));
5159 break;
5160 case 4:
5161 Assert( pfnFunction
5162 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5163 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5164 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5165 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5166 : UINT64_C(0xc000b000a0009000) ));
5167 break;
5168 case 8:
5169 Assert( pfnFunction
5170 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5171 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5172 : UINT64_C(0xc000b000a0009000) ));
5173 break;
5174#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5175 case sizeof(RTUINT128U):
5176 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5177 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5178 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5179 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5180 || ( enmOp == kIemNativeEmitMemOp_Store
5181 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5182 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5183 break;
5184 case sizeof(RTUINT256U):
5185 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5186 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5187 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5188 || ( enmOp == kIemNativeEmitMemOp_Store
5189 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5190 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5191 break;
5192#endif
5193 }
5194 }
5195 else
5196 {
5197 Assert(iSegReg < 6);
5198 switch (cbMem)
5199 {
5200 case 1:
5201 Assert( pfnFunction
5202 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5203 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5204 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5205 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5206 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5207 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5208 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5209 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5210 : UINT64_C(0xc000b000a0009000) ));
5211 break;
5212 case 2:
5213 Assert( pfnFunction
5214 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5215 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5216 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5217 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5218 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5219 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5220 : UINT64_C(0xc000b000a0009000) ));
5221 break;
5222 case 4:
5223 Assert( pfnFunction
5224 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5225 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5226 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5227 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5228 : UINT64_C(0xc000b000a0009000) ));
5229 break;
5230 case 8:
5231 Assert( pfnFunction
5232 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5233 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5234 : UINT64_C(0xc000b000a0009000) ));
5235 break;
5236#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5237 case sizeof(RTUINT128U):
5238 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5239 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5240 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5241 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5242 || ( enmOp == kIemNativeEmitMemOp_Store
5243 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5244 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5245 break;
5246 case sizeof(RTUINT256U):
5247 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5248 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5249 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5250 || ( enmOp == kIemNativeEmitMemOp_Store
5251 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5252 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5253 break;
5254#endif
5255 }
5256 }
5257#endif
5258
5259#ifdef VBOX_STRICT
5260 /*
5261 * Check that the fExec flags we've got make sense.
5262 */
5263 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5264#endif
5265
5266 /*
5267 * To keep things simple we have to commit any pending writes first as we
5268 * may end up making calls.
5269 */
5270 /** @todo we could postpone this till we make the call and reload the
5271 * registers after returning from the call. Not sure if that's sensible or
5272 * not, though. */
5273#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5274 off = iemNativeRegFlushPendingWrites(pReNative, off);
5275#else
5276 /* The program counter is treated differently for now. */
5277 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5278#endif
5279
5280#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5281 /*
5282 * Move/spill/flush stuff out of call-volatile registers.
5283 * This is the easy way out. We could contain this to the tlb-miss branch
5284 * by saving and restoring active stuff here.
5285 */
5286 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5287#endif
5288
5289 /*
5290 * Define labels and allocate the result register (trying for the return
5291 * register if we can).
5292 */
5293 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5294#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5295 uint8_t idxRegValueFetch = UINT8_MAX;
5296
5297 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5298 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5299 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5300 else
5301 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5302 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5303 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5304 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5305#else
5306 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5307 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5308 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5309 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5310#endif
5311 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5312
5313#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5314 uint8_t idxRegValueStore = UINT8_MAX;
5315
5316 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5317 idxRegValueStore = !TlbState.fSkip
5318 && enmOp == kIemNativeEmitMemOp_Store
5319 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5320 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5321 : UINT8_MAX;
5322 else
5323 idxRegValueStore = !TlbState.fSkip
5324 && enmOp == kIemNativeEmitMemOp_Store
5325 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5326 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5327 : UINT8_MAX;
5328
5329#else
5330 uint8_t const idxRegValueStore = !TlbState.fSkip
5331 && enmOp == kIemNativeEmitMemOp_Store
5332 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5333 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5334 : UINT8_MAX;
5335#endif
5336 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5337 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5338 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5339 : UINT32_MAX;
5340
5341 /*
5342 * Jump to the TLB lookup code.
5343 */
5344 if (!TlbState.fSkip)
5345 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5346
5347 /*
5348 * TlbMiss:
5349 *
5350 * Call helper to do the fetching.
5351 * We flush all guest register shadow copies here.
5352 */
5353 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5354
5355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5357#else
5358 RT_NOREF(idxInstr);
5359#endif
5360
5361#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5362 if (pReNative->Core.offPc)
5363 {
5364 /*
5365 * Update the program counter but restore it at the end of the TlbMiss branch.
5366 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5367 * which are hopefully much more frequent, reducing the amount of memory accesses.
5368 */
5369 /* Allocate a temporary PC register. */
5370 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5371
5372 /* Perform the addition and store the result. */
5373 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5374 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5375
5376 /* Free and flush the PC register. */
5377 iemNativeRegFreeTmp(pReNative, idxPcReg);
5378 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5379 }
5380#endif
5381
5382#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5383 /* Save variables in volatile registers. */
5384 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5385 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5386 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5387 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5388#endif
5389
5390 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5391 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5392#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5393 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5394 {
5395 /*
5396 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5397 *
5398 * @note There was a register variable assigned to the variable for the TlbLookup case above
5399 * which must not be freed or the value loaded into the register will not be synced into the register
5400 * further down the road because the variable doesn't know it had a variable assigned.
5401 *
5402 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5403 * as it will be overwritten anyway.
5404 */
5405 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5406 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5407 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5408 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5409 }
5410 else
5411#endif
5412 if (enmOp == kIemNativeEmitMemOp_Store)
5413 {
5414 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5415 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5416#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5417 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5418#else
5419 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5420 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5421#endif
5422 }
5423
5424 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5425 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5426#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5427 fVolGregMask);
5428#else
5429 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5430#endif
5431
5432 if (iSegReg != UINT8_MAX)
5433 {
5434 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5435 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5436 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5437 }
5438
5439 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5440 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5441
5442 /* Done setting up parameters, make the call. */
5443 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5444
5445 /*
5446 * Put the result in the right register if this is a fetch.
5447 */
5448 if (enmOp != kIemNativeEmitMemOp_Store)
5449 {
5450#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5451 if ( cbMem == sizeof(RTUINT128U)
5452 || cbMem == sizeof(RTUINT256U))
5453 {
5454 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5455
5456 /* Sync the value on the stack with the host register assigned to the variable. */
5457 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5458 }
5459 else
5460#endif
5461 {
5462 Assert(idxRegValueFetch == pVarValue->idxReg);
5463 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5464 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5465 }
5466 }
5467
5468#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5469 /* Restore variables and guest shadow registers to volatile registers. */
5470 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5471 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5472#endif
5473
5474#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5475 if (pReNative->Core.offPc)
5476 {
5477 /*
5478 * Time to restore the program counter to its original value.
5479 */
5480 /* Allocate a temporary PC register. */
5481 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5482
5483 /* Restore the original value. */
5484 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5485 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5486
5487 /* Free and flush the PC register. */
5488 iemNativeRegFreeTmp(pReNative, idxPcReg);
5489 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5490 }
5491#endif
5492
5493#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5494 if (!TlbState.fSkip)
5495 {
5496 /* end of TlbMiss - Jump to the done label. */
5497 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5498 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5499
5500 /*
5501 * TlbLookup:
5502 */
5503 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5504 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5505 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5506
5507 /*
5508 * Emit code to do the actual storing / fetching.
5509 */
5510 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5511# ifdef VBOX_WITH_STATISTICS
5512 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5513 enmOp == kIemNativeEmitMemOp_Store
5514 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5515 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5516# endif
5517 switch (enmOp)
5518 {
5519 case kIemNativeEmitMemOp_Store:
5520 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5521 {
5522 switch (cbMem)
5523 {
5524 case 1:
5525 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5526 break;
5527 case 2:
5528 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5529 break;
5530 case 4:
5531 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5532 break;
5533 case 8:
5534 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5535 break;
5536#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5537 case sizeof(RTUINT128U):
5538 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5539 break;
5540 case sizeof(RTUINT256U):
5541 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5542 break;
5543#endif
5544 default:
5545 AssertFailed();
5546 }
5547 }
5548 else
5549 {
5550 switch (cbMem)
5551 {
5552 case 1:
5553 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5554 idxRegMemResult, TlbState.idxReg1);
5555 break;
5556 case 2:
5557 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5558 idxRegMemResult, TlbState.idxReg1);
5559 break;
5560 case 4:
5561 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5562 idxRegMemResult, TlbState.idxReg1);
5563 break;
5564 case 8:
5565 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5566 idxRegMemResult, TlbState.idxReg1);
5567 break;
5568 default:
5569 AssertFailed();
5570 }
5571 }
5572 break;
5573
5574 case kIemNativeEmitMemOp_Fetch:
5575 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5576 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5577 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5578 switch (cbMem)
5579 {
5580 case 1:
5581 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5582 break;
5583 case 2:
5584 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5585 break;
5586 case 4:
5587 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5588 break;
5589 case 8:
5590 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5591 break;
5592#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5593 case sizeof(RTUINT128U):
5594 /*
5595 * No need to sync back the register with the stack, this is done by the generic variable handling
5596 * code if there is a register assigned to a variable and the stack must be accessed.
5597 */
5598 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5599 break;
5600 case sizeof(RTUINT256U):
5601 /*
5602 * No need to sync back the register with the stack, this is done by the generic variable handling
5603 * code if there is a register assigned to a variable and the stack must be accessed.
5604 */
5605 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5606 break;
5607#endif
5608 default:
5609 AssertFailed();
5610 }
5611 break;
5612
5613 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5614 Assert(cbMem == 1);
5615 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5616 break;
5617
5618 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5619 Assert(cbMem == 1 || cbMem == 2);
5620 if (cbMem == 1)
5621 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5622 else
5623 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5624 break;
5625
5626 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5627 switch (cbMem)
5628 {
5629 case 1:
5630 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5631 break;
5632 case 2:
5633 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5634 break;
5635 case 4:
5636 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5637 break;
5638 default:
5639 AssertFailed();
5640 }
5641 break;
5642
5643 default:
5644 AssertFailed();
5645 }
5646
5647 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5648
5649 /*
5650 * TlbDone:
5651 */
5652 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5653
5654 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5655
5656# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5657 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5658 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5659# endif
5660 }
5661#else
5662 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5663#endif
5664
5665 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5666 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5667 return off;
5668}
5669
5670
5671
5672/*********************************************************************************************************************************
5673* Memory fetches (IEM_MEM_FETCH_XXX). *
5674*********************************************************************************************************************************/
5675
5676/* 8-bit segmented: */
5677#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5678 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5679 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5680 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5681
5682#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5683 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5684 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5685 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5686
5687#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5688 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5689 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5690 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5691
5692#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5693 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5694 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5695 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5696
5697#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5698 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5699 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5700 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5701
5702#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5703 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5704 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5705 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5706
5707#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5708 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5709 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5710 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5711
5712/* 16-bit segmented: */
5713#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5714 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5715 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5716 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5717
5718#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5719 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5720 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5721 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5722
5723#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5724 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5725 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5726 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5727
5728#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5729 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5730 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5731 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5732
5733#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5734 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5735 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5736 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5737
5738#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5739 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5740 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5741 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5742
5743
5744/* 32-bit segmented: */
5745#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5746 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5747 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5748 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5749
5750#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5751 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5752 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5753 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5754
5755#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5756 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5757 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5758 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5759
5760#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5761 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5762 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5763 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5764
5765AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5766#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5767 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5768 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5769 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5770
5771
5772/* 64-bit segmented: */
5773#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5774 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5775 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5776 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5777
5778AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5779#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5780 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5781 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5782 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5783
5784
5785/* 8-bit flat: */
5786#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5787 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5788 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5789 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5790
5791#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5792 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5793 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5794 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5795
5796#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5797 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5798 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5799 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5800
5801#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5802 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5803 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5804 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5805
5806#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5807 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5808 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5809 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5810
5811#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5812 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5813 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5814 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5815
5816#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5817 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5818 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5819 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5820
5821
5822/* 16-bit flat: */
5823#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5824 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5825 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5826 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5827
5828#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5829 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5830 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5831 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5832
5833#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5834 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5835 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5836 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5837
5838#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5839 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5840 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5841 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5842
5843#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5844 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5845 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5846 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5847
5848#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5849 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5850 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5851 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5852
5853/* 32-bit flat: */
5854#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5856 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5857 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5858
5859#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5860 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5861 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5862 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5863
5864#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5866 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5867 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5868
5869#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5871 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5872 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5873
5874#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
5875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
5876 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5877 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5878
5879
5880/* 64-bit flat: */
5881#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5882 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5883 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5884 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5885
5886#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
5887 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
5888 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5889 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5890
5891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5892/* 128-bit segmented: */
5893#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
5894 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5895 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5896 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
5897
5898#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
5899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5900 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5901 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5902
5903AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
5904#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
5905 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
5906 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5907 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5908
5909#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
5910 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5911 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5912 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
5913
5914/* 128-bit flat: */
5915#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
5916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5917 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5918 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
5919
5920#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
5921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5922 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5923 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5924
5925#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
5926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
5927 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5928 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5929
5930#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
5931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5932 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5933 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
5934
5935/* 256-bit segmented: */
5936#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
5937 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5938 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5939 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5940
5941#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
5942 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5943 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5944 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5945
5946#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
5947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5948 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5949 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5950
5951
5952/* 256-bit flat: */
5953#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
5954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5955 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5956 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5957
5958#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
5959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5960 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5961 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5962
5963#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
5964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5965 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5966 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5967#endif
5968
5969
5970/*********************************************************************************************************************************
5971* Memory stores (IEM_MEM_STORE_XXX). *
5972*********************************************************************************************************************************/
5973
5974#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
5975 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
5976 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5977 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5978
5979#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
5980 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
5981 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5982 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5983
5984#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
5985 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
5986 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5987 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5988
5989#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
5990 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
5991 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5992 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5993
5994
5995#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
5996 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
5997 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5998 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5999
6000#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
6001 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
6002 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6003 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6004
6005#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
6006 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
6007 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6008 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6009
6010#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
6011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
6012 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6013 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6014
6015
6016#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
6017 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6018 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6019
6020#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
6021 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6022 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6023
6024#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
6025 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6026 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6027
6028#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
6029 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6030 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6031
6032
6033#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
6034 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6035 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6036
6037#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
6038 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6039 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6040
6041#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
6042 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6043 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6044
6045#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6046 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6047 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6048
6049/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6050 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6051DECL_INLINE_THROW(uint32_t)
6052iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6053 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6054{
6055 /*
6056 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6057 * to do the grunt work.
6058 */
6059 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6061 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6062 pfnFunction, idxInstr);
6063 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6064 return off;
6065}
6066
6067
6068#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6069# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6071 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6072 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6073
6074# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6076 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6077 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6078
6079# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6080 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6081 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6082 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6083
6084# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6086 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6087 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6088
6089
6090# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6091 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6092 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6093 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6094
6095# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6096 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6097 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6098 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6099
6100# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6101 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6102 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6103 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6104
6105# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6106 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6107 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6108 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6109#endif
6110
6111
6112
6113/*********************************************************************************************************************************
6114* Stack Accesses. *
6115*********************************************************************************************************************************/
6116/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6117#define IEM_MC_PUSH_U16(a_u16Value) \
6118 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6119 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6120#define IEM_MC_PUSH_U32(a_u32Value) \
6121 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6122 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6123#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6124 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6125 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6126#define IEM_MC_PUSH_U64(a_u64Value) \
6127 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6128 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6129
6130#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6131 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6132 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6133#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6134 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6135 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6136#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6137 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6138 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6139
6140#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6141 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6142 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6143#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6144 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6145 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6146
6147
6148DECL_FORCE_INLINE_THROW(uint32_t)
6149iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6150{
6151 /* Use16BitSp: */
6152#ifdef RT_ARCH_AMD64
6153 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6154 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6155#else
6156 /* sub regeff, regrsp, #cbMem */
6157 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6158 /* and regeff, regeff, #0xffff */
6159 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6160 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6161 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6162 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6163#endif
6164 return off;
6165}
6166
6167
6168DECL_FORCE_INLINE(uint32_t)
6169iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6170{
6171 /* Use32BitSp: */
6172 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6173 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6174 return off;
6175}
6176
6177
6178/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6179DECL_INLINE_THROW(uint32_t)
6180iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6181 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6182{
6183 /*
6184 * Assert sanity.
6185 */
6186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6187 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6188#ifdef VBOX_STRICT
6189 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6190 {
6191 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6192 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6193 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6194 Assert( pfnFunction
6195 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6196 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6197 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6198 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6199 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6200 : UINT64_C(0xc000b000a0009000) ));
6201 }
6202 else
6203 Assert( pfnFunction
6204 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6205 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6206 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6207 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6208 : UINT64_C(0xc000b000a0009000) ));
6209#endif
6210
6211#ifdef VBOX_STRICT
6212 /*
6213 * Check that the fExec flags we've got make sense.
6214 */
6215 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6216#endif
6217
6218 /*
6219 * To keep things simple we have to commit any pending writes first as we
6220 * may end up making calls.
6221 */
6222 /** @todo we could postpone this till we make the call and reload the
6223 * registers after returning from the call. Not sure if that's sensible or
6224 * not, though. */
6225 off = iemNativeRegFlushPendingWrites(pReNative, off);
6226
6227 /*
6228 * First we calculate the new RSP and the effective stack pointer value.
6229 * For 64-bit mode and flat 32-bit these two are the same.
6230 * (Code structure is very similar to that of PUSH)
6231 */
6232 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6233 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6234 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6235 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6236 ? cbMem : sizeof(uint16_t);
6237 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6238 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6239 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6240 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6241 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6242 if (cBitsFlat != 0)
6243 {
6244 Assert(idxRegEffSp == idxRegRsp);
6245 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6246 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6247 if (cBitsFlat == 64)
6248 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6249 else
6250 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6251 }
6252 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6253 {
6254 Assert(idxRegEffSp != idxRegRsp);
6255 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6256 kIemNativeGstRegUse_ReadOnly);
6257#ifdef RT_ARCH_AMD64
6258 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6259#else
6260 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6261#endif
6262 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6263 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6264 offFixupJumpToUseOtherBitSp = off;
6265 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6266 {
6267 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6268 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6269 }
6270 else
6271 {
6272 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6273 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6274 }
6275 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6276 }
6277 /* SpUpdateEnd: */
6278 uint32_t const offLabelSpUpdateEnd = off;
6279
6280 /*
6281 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6282 * we're skipping lookup).
6283 */
6284 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6285 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6286 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6287 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6288 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6289 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6290 : UINT32_MAX;
6291 uint8_t const idxRegValue = !TlbState.fSkip
6292 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6293 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6294 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6295 : UINT8_MAX;
6296 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6297
6298
6299 if (!TlbState.fSkip)
6300 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6301 else
6302 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6303
6304 /*
6305 * Use16BitSp:
6306 */
6307 if (cBitsFlat == 0)
6308 {
6309#ifdef RT_ARCH_AMD64
6310 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6311#else
6312 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6313#endif
6314 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6315 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6316 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6317 else
6318 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6319 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6320 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6321 }
6322
6323 /*
6324 * TlbMiss:
6325 *
6326 * Call helper to do the pushing.
6327 */
6328 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6329
6330#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6331 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6332#else
6333 RT_NOREF(idxInstr);
6334#endif
6335
6336 /* Save variables in volatile registers. */
6337 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6338 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6339 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6340 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6341 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6342
6343 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6344 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6345 {
6346 /* Swap them using ARG0 as temp register: */
6347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6348 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6349 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6350 }
6351 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6352 {
6353 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6354 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6355 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6356
6357 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6358 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6359 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6360 }
6361 else
6362 {
6363 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6364 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6365
6366 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6367 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6368 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6369 }
6370
6371 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6373
6374 /* Done setting up parameters, make the call. */
6375 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6376
6377 /* Restore variables and guest shadow registers to volatile registers. */
6378 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6379 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6380
6381#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6382 if (!TlbState.fSkip)
6383 {
6384 /* end of TlbMiss - Jump to the done label. */
6385 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6386 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6387
6388 /*
6389 * TlbLookup:
6390 */
6391 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6392 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6393
6394 /*
6395 * Emit code to do the actual storing / fetching.
6396 */
6397 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6398# ifdef VBOX_WITH_STATISTICS
6399 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6400 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6401# endif
6402 if (idxRegValue != UINT8_MAX)
6403 {
6404 switch (cbMemAccess)
6405 {
6406 case 2:
6407 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6408 break;
6409 case 4:
6410 if (!fIsIntelSeg)
6411 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6412 else
6413 {
6414 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6415 PUSH FS in real mode, so we have to try emulate that here.
6416 We borrow the now unused idxReg1 from the TLB lookup code here. */
6417 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6418 kIemNativeGstReg_EFlags);
6419 if (idxRegEfl != UINT8_MAX)
6420 {
6421#ifdef ARCH_AMD64
6422 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6423 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6424 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6425#else
6426 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6427 off, TlbState.idxReg1, idxRegEfl,
6428 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6429#endif
6430 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6431 }
6432 else
6433 {
6434 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6435 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6436 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6437 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6438 }
6439 /* ASSUMES the upper half of idxRegValue is ZERO. */
6440 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6441 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6442 }
6443 break;
6444 case 8:
6445 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6446 break;
6447 default:
6448 AssertFailed();
6449 }
6450 }
6451 else
6452 {
6453 switch (cbMemAccess)
6454 {
6455 case 2:
6456 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6457 idxRegMemResult, TlbState.idxReg1);
6458 break;
6459 case 4:
6460 Assert(!fIsSegReg);
6461 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6462 idxRegMemResult, TlbState.idxReg1);
6463 break;
6464 case 8:
6465 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6466 break;
6467 default:
6468 AssertFailed();
6469 }
6470 }
6471
6472 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6473 TlbState.freeRegsAndReleaseVars(pReNative);
6474
6475 /*
6476 * TlbDone:
6477 *
6478 * Commit the new RSP value.
6479 */
6480 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6481 }
6482#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6483
6484#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6485 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6486#endif
6487 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6488 if (idxRegEffSp != idxRegRsp)
6489 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6490
6491 /* The value variable is implictly flushed. */
6492 if (idxRegValue != UINT8_MAX)
6493 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6494 iemNativeVarFreeLocal(pReNative, idxVarValue);
6495
6496 return off;
6497}
6498
6499
6500
6501/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6502#define IEM_MC_POP_GREG_U16(a_iGReg) \
6503 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6504 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6505#define IEM_MC_POP_GREG_U32(a_iGReg) \
6506 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6507 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6508#define IEM_MC_POP_GREG_U64(a_iGReg) \
6509 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6510 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6511
6512#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6513 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6514 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6515#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6516 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6517 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6518
6519#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6520 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6521 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6522#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6523 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6524 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6525
6526
6527DECL_FORCE_INLINE_THROW(uint32_t)
6528iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6529 uint8_t idxRegTmp)
6530{
6531 /* Use16BitSp: */
6532#ifdef RT_ARCH_AMD64
6533 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6534 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6535 RT_NOREF(idxRegTmp);
6536#else
6537 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6538 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6539 /* add tmp, regrsp, #cbMem */
6540 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6541 /* and tmp, tmp, #0xffff */
6542 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6543 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6544 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6545 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6546#endif
6547 return off;
6548}
6549
6550
6551DECL_FORCE_INLINE(uint32_t)
6552iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6553{
6554 /* Use32BitSp: */
6555 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6556 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6557 return off;
6558}
6559
6560
6561/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6562DECL_INLINE_THROW(uint32_t)
6563iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6564 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6565{
6566 /*
6567 * Assert sanity.
6568 */
6569 Assert(idxGReg < 16);
6570#ifdef VBOX_STRICT
6571 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6572 {
6573 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6574 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6575 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6576 Assert( pfnFunction
6577 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6578 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6579 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6580 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6581 : UINT64_C(0xc000b000a0009000) ));
6582 }
6583 else
6584 Assert( pfnFunction
6585 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6586 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6587 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6588 : UINT64_C(0xc000b000a0009000) ));
6589#endif
6590
6591#ifdef VBOX_STRICT
6592 /*
6593 * Check that the fExec flags we've got make sense.
6594 */
6595 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6596#endif
6597
6598 /*
6599 * To keep things simple we have to commit any pending writes first as we
6600 * may end up making calls.
6601 */
6602 off = iemNativeRegFlushPendingWrites(pReNative, off);
6603
6604 /*
6605 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6606 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6607 * directly as the effective stack pointer.
6608 * (Code structure is very similar to that of PUSH)
6609 */
6610 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6611 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6612 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6613 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6614 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6615 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6616 * will be the resulting register value. */
6617 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6618
6619 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6620 if (cBitsFlat != 0)
6621 {
6622 Assert(idxRegEffSp == idxRegRsp);
6623 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6624 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6625 }
6626 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6627 {
6628 Assert(idxRegEffSp != idxRegRsp);
6629 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6630 kIemNativeGstRegUse_ReadOnly);
6631#ifdef RT_ARCH_AMD64
6632 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6633#else
6634 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6635#endif
6636 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6637 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6638 offFixupJumpToUseOtherBitSp = off;
6639 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6640 {
6641/** @todo can skip idxRegRsp updating when popping ESP. */
6642 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6643 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6644 }
6645 else
6646 {
6647 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6648 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6649 }
6650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6651 }
6652 /* SpUpdateEnd: */
6653 uint32_t const offLabelSpUpdateEnd = off;
6654
6655 /*
6656 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6657 * we're skipping lookup).
6658 */
6659 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6660 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6661 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6662 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6663 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6664 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6665 : UINT32_MAX;
6666
6667 if (!TlbState.fSkip)
6668 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6669 else
6670 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6671
6672 /*
6673 * Use16BitSp:
6674 */
6675 if (cBitsFlat == 0)
6676 {
6677#ifdef RT_ARCH_AMD64
6678 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6679#else
6680 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6681#endif
6682 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6683 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6684 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6685 else
6686 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6687 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6688 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6689 }
6690
6691 /*
6692 * TlbMiss:
6693 *
6694 * Call helper to do the pushing.
6695 */
6696 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6697
6698#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6699 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6700#else
6701 RT_NOREF(idxInstr);
6702#endif
6703
6704 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6705 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6706 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6707 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6708
6709
6710 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6711 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6712 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6713
6714 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6715 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6716
6717 /* Done setting up parameters, make the call. */
6718 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6719
6720 /* Move the return register content to idxRegMemResult. */
6721 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6722 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6723
6724 /* Restore variables and guest shadow registers to volatile registers. */
6725 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6726 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6727
6728#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6729 if (!TlbState.fSkip)
6730 {
6731 /* end of TlbMiss - Jump to the done label. */
6732 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6733 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6734
6735 /*
6736 * TlbLookup:
6737 */
6738 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6739 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6740
6741 /*
6742 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6743 */
6744 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6745# ifdef VBOX_WITH_STATISTICS
6746 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6747 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6748# endif
6749 switch (cbMem)
6750 {
6751 case 2:
6752 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6753 break;
6754 case 4:
6755 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6756 break;
6757 case 8:
6758 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6759 break;
6760 default:
6761 AssertFailed();
6762 }
6763
6764 TlbState.freeRegsAndReleaseVars(pReNative);
6765
6766 /*
6767 * TlbDone:
6768 *
6769 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6770 * commit the popped register value.
6771 */
6772 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6773 }
6774#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6775
6776 if (idxGReg != X86_GREG_xSP)
6777 {
6778 /* Set the register. */
6779 if (cbMem >= sizeof(uint32_t))
6780 {
6781#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6782 AssertMsg( pReNative->idxCurCall == 0
6783 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6784 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6785#endif
6786 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6787#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6788 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6789#endif
6790#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6791 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6792 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6793#endif
6794 }
6795 else
6796 {
6797 Assert(cbMem == sizeof(uint16_t));
6798 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6799 kIemNativeGstRegUse_ForUpdate);
6800 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6801#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6802 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6803#endif
6804 iemNativeRegFreeTmp(pReNative, idxRegDst);
6805 }
6806
6807 /* Complete RSP calculation for FLAT mode. */
6808 if (idxRegEffSp == idxRegRsp)
6809 {
6810 if (cBitsFlat == 64)
6811 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6812 else
6813 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6814 }
6815 }
6816 else
6817 {
6818 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6819 if (cbMem == sizeof(uint64_t))
6820 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6821 else if (cbMem == sizeof(uint32_t))
6822 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6823 else
6824 {
6825 if (idxRegEffSp == idxRegRsp)
6826 {
6827 if (cBitsFlat == 64)
6828 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6829 else
6830 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6831 }
6832 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6833 }
6834 }
6835
6836#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6837 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6838#endif
6839
6840 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6841 if (idxRegEffSp != idxRegRsp)
6842 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6843 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6844
6845 return off;
6846}
6847
6848
6849
6850/*********************************************************************************************************************************
6851* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6852*********************************************************************************************************************************/
6853
6854#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6855 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6856 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6857 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6858
6859#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6860 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6861 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6862 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6863
6864#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6865 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6866 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6867 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6868
6869#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6870 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6871 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6872 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6873
6874
6875#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6876 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6877 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6878 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6879
6880#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6881 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6882 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6883 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6884
6885#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6886 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6887 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6888 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6889
6890#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6891 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6892 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6893 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6894
6895#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6896 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6897 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6898 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6899
6900
6901#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6902 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6903 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6904 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6905
6906#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6907 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6908 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6909 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6910
6911#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6912 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6913 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6914 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6915
6916#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6917 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6918 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6919 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6920
6921#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6922 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6923 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6924 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6925
6926
6927#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6928 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6929 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6930 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
6931
6932#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6933 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6934 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6935 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
6936#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6937 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6938 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6939 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6940
6941#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6942 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6943 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6944 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
6945
6946#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6947 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
6948 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6949 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6950
6951
6952#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6953 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6954 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6955 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
6956
6957#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6958 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6959 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6960 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
6961
6962
6963#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6964 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6965 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6966 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
6967
6968#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6969 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6970 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6971 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
6972
6973#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6974 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6975 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6976 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
6977
6978#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6979 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6980 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6981 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
6982
6983
6984
6985#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6986 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6987 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6988 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
6989
6990#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6991 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6992 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6993 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
6994
6995#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6996 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6997 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6998 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
6999
7000#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7001 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7002 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7003 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
7004
7005
7006#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7007 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7008 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7009 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
7010
7011#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7012 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7013 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7014 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
7015
7016#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7017 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7018 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7019 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7020
7021#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7022 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7023 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7024 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
7025
7026#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
7027 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
7028 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7029 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7030
7031
7032#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7033 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7034 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7035 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
7036
7037#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7038 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7039 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7040 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
7041
7042#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7043 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7044 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7045 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7046
7047#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7048 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7049 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7050 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7051
7052#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7053 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7054 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7055 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7056
7057
7058#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7060 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7061 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7062
7063#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7065 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7066 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7067
7068#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7070 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7071 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7072
7073#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7074 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7075 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7076 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7077
7078#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7079 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7080 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7081 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7082
7083
7084#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7085 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7086 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7087 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7088
7089#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7091 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7092 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7093
7094
7095#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7096 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7097 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7098 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7099
7100#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7101 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7102 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7103 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7104
7105#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7107 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7108 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7109
7110#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7111 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7112 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7113 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7114
7115
7116DECL_INLINE_THROW(uint32_t)
7117iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7118 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7119 uintptr_t pfnFunction, uint8_t idxInstr)
7120{
7121 /*
7122 * Assert sanity.
7123 */
7124 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7125 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7126 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7127 && pVarMem->cbVar == sizeof(void *),
7128 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7129
7130 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7131 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7132 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7133 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7134 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7135
7136 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7138 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7139 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7140 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7141
7142 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7143
7144 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7145
7146#ifdef VBOX_STRICT
7147# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7148 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7149 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7150 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7151 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7152# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7153 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7154 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7155 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7156
7157 if (iSegReg == UINT8_MAX)
7158 {
7159 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7160 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7161 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7162 switch (cbMem)
7163 {
7164 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7165 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7166 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7167 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7168 case 10:
7169 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7170 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7171 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7172 break;
7173 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7174# if 0
7175 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7176 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7177# endif
7178 default: AssertFailed(); break;
7179 }
7180 }
7181 else
7182 {
7183 Assert(iSegReg < 6);
7184 switch (cbMem)
7185 {
7186 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7187 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7188 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7189 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7190 case 10:
7191 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7192 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7193 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7194 break;
7195 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7196# if 0
7197 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7198 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7199# endif
7200 default: AssertFailed(); break;
7201 }
7202 }
7203# undef IEM_MAP_HLP_FN
7204# undef IEM_MAP_HLP_FN_NO_AT
7205#endif
7206
7207#ifdef VBOX_STRICT
7208 /*
7209 * Check that the fExec flags we've got make sense.
7210 */
7211 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7212#endif
7213
7214 /*
7215 * To keep things simple we have to commit any pending writes first as we
7216 * may end up making calls.
7217 */
7218 off = iemNativeRegFlushPendingWrites(pReNative, off);
7219
7220#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7221 /*
7222 * Move/spill/flush stuff out of call-volatile registers.
7223 * This is the easy way out. We could contain this to the tlb-miss branch
7224 * by saving and restoring active stuff here.
7225 */
7226 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7227 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7228#endif
7229
7230 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7231 while the tlb-miss codepath will temporarily put it on the stack.
7232 Set the the type to stack here so we don't need to do it twice below. */
7233 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7234 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7235 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7236 * lookup is done. */
7237
7238 /*
7239 * Define labels and allocate the result register (trying for the return
7240 * register if we can).
7241 */
7242 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7243 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7244 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7245 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7246 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7247 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7248 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7249 : UINT32_MAX;
7250//off=iemNativeEmitBrk(pReNative, off, 0);
7251 /*
7252 * Jump to the TLB lookup code.
7253 */
7254 if (!TlbState.fSkip)
7255 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7256
7257 /*
7258 * TlbMiss:
7259 *
7260 * Call helper to do the fetching.
7261 * We flush all guest register shadow copies here.
7262 */
7263 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7264
7265#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7266 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7267#else
7268 RT_NOREF(idxInstr);
7269#endif
7270
7271#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7272 /* Save variables in volatile registers. */
7273 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7274 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7275#endif
7276
7277 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7278 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7279#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7280 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7281#else
7282 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7283#endif
7284
7285 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7286 if (iSegReg != UINT8_MAX)
7287 {
7288 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7289 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7290 }
7291
7292 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7293 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7294 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7295
7296 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7297 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7298
7299 /* Done setting up parameters, make the call. */
7300 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7301
7302 /*
7303 * Put the output in the right registers.
7304 */
7305 Assert(idxRegMemResult == pVarMem->idxReg);
7306 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7307 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7308
7309#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7310 /* Restore variables and guest shadow registers to volatile registers. */
7311 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7312 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7313#endif
7314
7315 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7316 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7317
7318#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7319 if (!TlbState.fSkip)
7320 {
7321 /* end of tlbsmiss - Jump to the done label. */
7322 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7323 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7324
7325 /*
7326 * TlbLookup:
7327 */
7328 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7329 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7330# ifdef VBOX_WITH_STATISTICS
7331 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7332 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7333# endif
7334
7335 /* [idxVarUnmapInfo] = 0; */
7336 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7337
7338 /*
7339 * TlbDone:
7340 */
7341 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7342
7343 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7344
7345# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7346 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7347 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7348# endif
7349 }
7350#else
7351 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7352#endif
7353
7354 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7355 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7356
7357 return off;
7358}
7359
7360
7361#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7362 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7363 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7364
7365#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7366 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7367 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7368
7369#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7370 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7371 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7372
7373#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7374 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7375 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7376
7377DECL_INLINE_THROW(uint32_t)
7378iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7379 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7380{
7381 /*
7382 * Assert sanity.
7383 */
7384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7385#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7386 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7387#endif
7388 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7389 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7390 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7391#ifdef VBOX_STRICT
7392 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7393 {
7394 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7395 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7396 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7397 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7398 case IEM_ACCESS_TYPE_WRITE:
7399 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7400 case IEM_ACCESS_TYPE_READ:
7401 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7402 default: AssertFailed();
7403 }
7404#else
7405 RT_NOREF(fAccess);
7406#endif
7407
7408 /*
7409 * To keep things simple we have to commit any pending writes first as we
7410 * may end up making calls (there shouldn't be any at this point, so this
7411 * is just for consistency).
7412 */
7413 /** @todo we could postpone this till we make the call and reload the
7414 * registers after returning from the call. Not sure if that's sensible or
7415 * not, though. */
7416 off = iemNativeRegFlushPendingWrites(pReNative, off);
7417
7418 /*
7419 * Move/spill/flush stuff out of call-volatile registers.
7420 *
7421 * We exclude any register holding the bUnmapInfo variable, as we'll be
7422 * checking it after returning from the call and will free it afterwards.
7423 */
7424 /** @todo save+restore active registers and maybe guest shadows in miss
7425 * scenario. */
7426 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7427 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7428
7429 /*
7430 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7431 * to call the unmap helper function.
7432 *
7433 * The likelyhood of it being zero is higher than for the TLB hit when doing
7434 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7435 * access should also end up with a mapping that won't need special unmapping.
7436 */
7437 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7438 * should speed up things for the pure interpreter as well when TLBs
7439 * are enabled. */
7440#ifdef RT_ARCH_AMD64
7441 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7442 {
7443 /* test byte [rbp - xxx], 0ffh */
7444 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7445 pbCodeBuf[off++] = 0xf6;
7446 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7447 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7448 pbCodeBuf[off++] = 0xff;
7449 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7450 }
7451 else
7452#endif
7453 {
7454 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7455 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7456 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7457 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7458 }
7459 uint32_t const offJmpFixup = off;
7460 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7461
7462 /*
7463 * Call the unmap helper function.
7464 */
7465#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7466 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7467#else
7468 RT_NOREF(idxInstr);
7469#endif
7470
7471 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7472 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7473 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7474
7475 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7477
7478 /* Done setting up parameters, make the call. */
7479 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7480
7481 /* The bUnmapInfo variable is implictly free by these MCs. */
7482 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7483
7484 /*
7485 * Done, just fixup the jump for the non-call case.
7486 */
7487 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7488
7489 return off;
7490}
7491
7492
7493
7494/*********************************************************************************************************************************
7495* State and Exceptions *
7496*********************************************************************************************************************************/
7497
7498#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7499#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7500
7501#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7502#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7503#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7504
7505#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7506#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7507#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7508
7509
7510DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7511{
7512 /** @todo this needs a lot more work later. */
7513 RT_NOREF(pReNative, fForChange);
7514 return off;
7515}
7516
7517
7518
7519/*********************************************************************************************************************************
7520* Emitters for FPU related operations. *
7521*********************************************************************************************************************************/
7522
7523#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7524 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7525
7526/** Emits code for IEM_MC_FETCH_FCW. */
7527DECL_INLINE_THROW(uint32_t)
7528iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7529{
7530 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7531 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7532
7533 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7534
7535 /* Allocate a temporary FCW register. */
7536 /** @todo eliminate extra register */
7537 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7538 kIemNativeGstRegUse_ReadOnly);
7539
7540 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7541
7542 /* Free but don't flush the FCW register. */
7543 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7544 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7545
7546 return off;
7547}
7548
7549
7550#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7551 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7552
7553/** Emits code for IEM_MC_FETCH_FSW. */
7554DECL_INLINE_THROW(uint32_t)
7555iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7556{
7557 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7558 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7559
7560 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7561 /* Allocate a temporary FSW register. */
7562 /** @todo eliminate extra register */
7563 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7564 kIemNativeGstRegUse_ReadOnly);
7565
7566 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7567
7568 /* Free but don't flush the FSW register. */
7569 iemNativeRegFreeTmp(pReNative, idxFswReg);
7570 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7571
7572 return off;
7573}
7574
7575
7576
7577#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7578
7579
7580/*********************************************************************************************************************************
7581* Emitters for SSE/AVX specific operations. *
7582*********************************************************************************************************************************/
7583
7584#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7585 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7586
7587/** Emits code for IEM_MC_COPY_XREG_U128. */
7588DECL_INLINE_THROW(uint32_t)
7589iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7590{
7591 /* This is a nop if the source and destination register are the same. */
7592 if (iXRegDst != iXRegSrc)
7593 {
7594 /* Allocate destination and source register. */
7595 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7596 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7597 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7598 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7599
7600 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7601
7602 /* Free but don't flush the source and destination register. */
7603 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7604 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7605 }
7606
7607 return off;
7608}
7609
7610
7611#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7612 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7613
7614/** Emits code for IEM_MC_FETCH_XREG_U128. */
7615DECL_INLINE_THROW(uint32_t)
7616iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7617{
7618 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7619 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7620
7621 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7622 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7623
7624 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7625
7626 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7627
7628 /* Free but don't flush the source register. */
7629 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7630 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7631
7632 return off;
7633}
7634
7635
7636#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7637 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7638
7639/** Emits code for IEM_MC_FETCH_XREG_U64. */
7640DECL_INLINE_THROW(uint32_t)
7641iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7642{
7643 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7644 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7645
7646 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7647 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7648
7649 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7650 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7651
7652 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7653
7654 /* Free but don't flush the source register. */
7655 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7656 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7657
7658 return off;
7659}
7660
7661
7662#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
7663 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
7664
7665/** Emits code for IEM_MC_FETCH_XREG_U32. */
7666DECL_INLINE_THROW(uint32_t)
7667iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7668{
7669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7670 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7671
7672 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7673 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7674
7675 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7676 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7677
7678 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7679
7680 /* Free but don't flush the source register. */
7681 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7682 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7683
7684 return off;
7685}
7686
7687
7688#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7689 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7690
7691/** Emits code for IEM_MC_FETCH_XREG_U16. */
7692DECL_INLINE_THROW(uint32_t)
7693iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7694{
7695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7696 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7697
7698 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7699 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7700
7701 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7702 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7703
7704 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7705
7706 /* Free but don't flush the source register. */
7707 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7708 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7709
7710 return off;
7711}
7712
7713
7714#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7715 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7716
7717/** Emits code for IEM_MC_FETCH_XREG_U8. */
7718DECL_INLINE_THROW(uint32_t)
7719iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7720{
7721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7723
7724 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7725 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7726
7727 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7728 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7729
7730 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7731
7732 /* Free but don't flush the source register. */
7733 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7734 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7735
7736 return off;
7737}
7738
7739
7740#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7741 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7742
7743AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7744#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7745 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7746
7747
7748/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7749DECL_INLINE_THROW(uint32_t)
7750iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7751{
7752 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7754
7755 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7756 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7757 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7758
7759 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7760
7761 /* Free but don't flush the source register. */
7762 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7763 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7764
7765 return off;
7766}
7767
7768
7769#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7770 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7771
7772#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7773 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7774
7775#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7776 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7777
7778#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7779 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7780
7781#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7782 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7783
7784#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7785 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7786
7787/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7788DECL_INLINE_THROW(uint32_t)
7789iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
7790 uint8_t cbLocal, uint8_t iElem)
7791{
7792 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7793 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7794
7795#ifdef VBOX_STRICT
7796 switch (cbLocal)
7797 {
7798 case sizeof(uint64_t): Assert(iElem < 2); break;
7799 case sizeof(uint32_t): Assert(iElem < 4); break;
7800 case sizeof(uint16_t): Assert(iElem < 8); break;
7801 case sizeof(uint8_t): Assert(iElem < 16); break;
7802 default: AssertFailed();
7803 }
7804#endif
7805
7806 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7807 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7808 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7809
7810 switch (cbLocal)
7811 {
7812 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7813 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7814 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7815 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7816 default: AssertFailed();
7817 }
7818
7819 /* Free but don't flush the source register. */
7820 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7821 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7822
7823 return off;
7824}
7825
7826
7827#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7828 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7829
7830/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
7831DECL_INLINE_THROW(uint32_t)
7832iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7833{
7834 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7835 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7836
7837 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7838 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7840
7841 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7842 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7843 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7844
7845 /* Free but don't flush the source register. */
7846 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7847 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7848
7849 return off;
7850}
7851
7852
7853#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7854 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7855
7856/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
7857DECL_INLINE_THROW(uint32_t)
7858iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7859{
7860 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7861 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7862
7863 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7864 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7866
7867 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7868 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7869 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7870
7871 /* Free but don't flush the source register. */
7872 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7873 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7874
7875 return off;
7876}
7877
7878
7879#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
7880 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
7881
7882/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
7883DECL_INLINE_THROW(uint32_t)
7884iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
7885 uint8_t idxSrcVar, uint8_t iDwSrc)
7886{
7887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7889
7890 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7891 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7892 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7893
7894 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
7895 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
7896
7897 /* Free but don't flush the destination register. */
7898 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7899 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7900
7901 return off;
7902}
7903
7904
7905#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7906 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7907
7908/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
7909DECL_INLINE_THROW(uint32_t)
7910iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7911{
7912 /*
7913 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7914 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7915 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7916 */
7917 if (iYRegDst != iYRegSrc)
7918 {
7919 /* Allocate destination and source register. */
7920 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7921 kIemNativeGstSimdRegLdStSz_256,
7922 kIemNativeGstRegUse_ForFullWrite);
7923 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7924 kIemNativeGstSimdRegLdStSz_Low128,
7925 kIemNativeGstRegUse_ReadOnly);
7926
7927 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7928 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7929
7930 /* Free but don't flush the source and destination register. */
7931 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7932 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7933 }
7934 else
7935 {
7936 /* This effectively only clears the upper 128-bits of the register. */
7937 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7938 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7939
7940 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7941
7942 /* Free but don't flush the destination register. */
7943 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7944 }
7945
7946 return off;
7947}
7948
7949
7950#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7951 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7952
7953/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
7954DECL_INLINE_THROW(uint32_t)
7955iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7956{
7957 /*
7958 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7959 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7960 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7961 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
7962 */
7963 if (iYRegDst != iYRegSrc)
7964 {
7965 /* Allocate destination and source register. */
7966 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7967 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
7968 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7969 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7970
7971 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7972
7973 /* Free but don't flush the source and destination register. */
7974 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7975 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7976 }
7977
7978 return off;
7979}
7980
7981
7982#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
7983 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
7984
7985/** Emits code for IEM_MC_FETCH_YREG_U128. */
7986DECL_INLINE_THROW(uint32_t)
7987iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
7988{
7989 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7990 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7991
7992 Assert(iDQWord <= 1);
7993 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7994 iDQWord == 1
7995 ? kIemNativeGstSimdRegLdStSz_High128
7996 : kIemNativeGstSimdRegLdStSz_Low128,
7997 kIemNativeGstRegUse_ReadOnly);
7998
7999 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8000 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8001
8002 if (iDQWord == 1)
8003 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8004 else
8005 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8006
8007 /* Free but don't flush the source register. */
8008 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8009 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8010
8011 return off;
8012}
8013
8014
8015#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
8016 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
8017
8018/** Emits code for IEM_MC_FETCH_YREG_U64. */
8019DECL_INLINE_THROW(uint32_t)
8020iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
8021{
8022 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8023 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8024
8025 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8026 iQWord >= 2
8027 ? kIemNativeGstSimdRegLdStSz_High128
8028 : kIemNativeGstSimdRegLdStSz_Low128,
8029 kIemNativeGstRegUse_ReadOnly);
8030
8031 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8032 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8033
8034 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8035
8036 /* Free but don't flush the source register. */
8037 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8038 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8039
8040 return off;
8041}
8042
8043
8044#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8045 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8046
8047/** Emits code for IEM_MC_FETCH_YREG_U32. */
8048DECL_INLINE_THROW(uint32_t)
8049iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8050{
8051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8052 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8053
8054 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8055 iDWord >= 4
8056 ? kIemNativeGstSimdRegLdStSz_High128
8057 : kIemNativeGstSimdRegLdStSz_Low128,
8058 kIemNativeGstRegUse_ReadOnly);
8059
8060 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8061 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8062
8063 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8064
8065 /* Free but don't flush the source register. */
8066 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8067 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8068
8069 return off;
8070}
8071
8072
8073#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8074 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8075
8076/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8077DECL_INLINE_THROW(uint32_t)
8078iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8079{
8080 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8081 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8082
8083 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8084
8085 /* Free but don't flush the register. */
8086 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8087
8088 return off;
8089}
8090
8091
8092#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8093 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8094
8095/** Emits code for IEM_MC_STORE_YREG_U128. */
8096DECL_INLINE_THROW(uint32_t)
8097iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8098{
8099 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8100 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8101
8102 Assert(iDQword <= 1);
8103 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8104 iDQword == 0
8105 ? kIemNativeGstSimdRegLdStSz_Low128
8106 : kIemNativeGstSimdRegLdStSz_High128,
8107 kIemNativeGstRegUse_ForFullWrite);
8108
8109 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8110
8111 if (iDQword == 0)
8112 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8113 else
8114 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8115
8116 /* Free but don't flush the source register. */
8117 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8118 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8119
8120 return off;
8121}
8122
8123
8124#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8125 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8126
8127/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8128DECL_INLINE_THROW(uint32_t)
8129iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8130{
8131 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8132 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8133
8134 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8135 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8136
8137 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8138
8139 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8140 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8141
8142 /* Free but don't flush the source register. */
8143 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8144 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8145
8146 return off;
8147}
8148
8149
8150#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8151 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8152
8153/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8154DECL_INLINE_THROW(uint32_t)
8155iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8156{
8157 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8158 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8159
8160 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8161 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8162
8163 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8164
8165 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8166 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8167
8168 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8169 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8170
8171 return off;
8172}
8173
8174
8175#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8176 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8177
8178/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8179DECL_INLINE_THROW(uint32_t)
8180iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8181{
8182 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8183 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8184
8185 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8186 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8187
8188 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8189
8190 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8191 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8192
8193 /* Free but don't flush the source register. */
8194 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8195 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8196
8197 return off;
8198}
8199
8200
8201#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8202 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8203
8204/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8205DECL_INLINE_THROW(uint32_t)
8206iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8207{
8208 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8209 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8210
8211 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8212 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8213
8214 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8215
8216 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8217 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8218
8219 /* Free but don't flush the source register. */
8220 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8221 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8222
8223 return off;
8224}
8225
8226
8227#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8228 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8229
8230/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8231DECL_INLINE_THROW(uint32_t)
8232iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8233{
8234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8235 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8236
8237 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8238 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8239
8240 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8241
8242 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8243 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8244
8245 /* Free but don't flush the source register. */
8246 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8247 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8248
8249 return off;
8250}
8251
8252
8253#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8254 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8255
8256/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8257DECL_INLINE_THROW(uint32_t)
8258iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8259{
8260 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8261 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8262
8263 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8264 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8265
8266 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8267
8268 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8269
8270 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8271 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8272
8273 return off;
8274}
8275
8276
8277#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8278 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8279
8280/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8281DECL_INLINE_THROW(uint32_t)
8282iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8283{
8284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8285 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8286
8287 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8288 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8289
8290 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8291
8292 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8293
8294 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8295 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8296
8297 return off;
8298}
8299
8300
8301#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8302 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8303
8304/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8305DECL_INLINE_THROW(uint32_t)
8306iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8307{
8308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8309 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8310
8311 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8312 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8313
8314 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8315
8316 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8317
8318 /* Free but don't flush the source register. */
8319 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8320 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8321
8322 return off;
8323}
8324
8325
8326#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8327 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8328
8329/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8330DECL_INLINE_THROW(uint32_t)
8331iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8332{
8333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8334 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8335
8336 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8337 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8338
8339 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8340
8341 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8342
8343 /* Free but don't flush the source register. */
8344 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8345 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8346
8347 return off;
8348}
8349
8350
8351#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8352 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8353
8354/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8355DECL_INLINE_THROW(uint32_t)
8356iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8357{
8358 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8359 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8360
8361 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8362 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8363
8364 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8365
8366 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8367
8368 /* Free but don't flush the source register. */
8369 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8370 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8371
8372 return off;
8373}
8374
8375
8376#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8377 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8378
8379/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8380DECL_INLINE_THROW(uint32_t)
8381iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8382{
8383 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8384 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8385
8386 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8387 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8388
8389 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8390
8391 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8392 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8393
8394 /* Free but don't flush the source register. */
8395 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8396 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8397
8398 return off;
8399}
8400
8401
8402#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8403 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8404
8405/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8406DECL_INLINE_THROW(uint32_t)
8407iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8408{
8409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8410 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8411
8412 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8413 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8414
8415 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8416
8417 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8418 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8419
8420 /* Free but don't flush the source register. */
8421 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8422 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8423
8424 return off;
8425}
8426
8427
8428#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8429 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8430
8431/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8432DECL_INLINE_THROW(uint32_t)
8433iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8434{
8435 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8436 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8437
8438 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8439 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8440 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8441 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8442 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8443
8444 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8445 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8446 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8447
8448 /* Free but don't flush the source and destination registers. */
8449 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8450 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8451 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8452
8453 return off;
8454}
8455
8456
8457#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8458 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8459
8460/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8461DECL_INLINE_THROW(uint32_t)
8462iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8463{
8464 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8465 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8466
8467 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8468 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8469 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8470 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8471 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8472
8473 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8474 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8475 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8476
8477 /* Free but don't flush the source and destination registers. */
8478 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8479 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8480 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8481
8482 return off;
8483}
8484
8485
8486#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8487 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8488
8489
8490/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8491DECL_INLINE_THROW(uint32_t)
8492iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8493{
8494 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8495 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8496
8497 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8498 if (bImm8Mask & RT_BIT(0))
8499 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8500 if (bImm8Mask & RT_BIT(1))
8501 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8502 if (bImm8Mask & RT_BIT(2))
8503 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8504 if (bImm8Mask & RT_BIT(3))
8505 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8506
8507 /* Free but don't flush the destination register. */
8508 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8509
8510 return off;
8511}
8512
8513
8514#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8515 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8516
8517
8518/** Emits code for IEM_MC_FETCH_YREG_U256. */
8519DECL_INLINE_THROW(uint32_t)
8520iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8521{
8522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8523 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8524
8525 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8526 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8527 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8528
8529 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8530
8531 /* Free but don't flush the source register. */
8532 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8533 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8534
8535 return off;
8536}
8537
8538
8539#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8540 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8541
8542
8543/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8544DECL_INLINE_THROW(uint32_t)
8545iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8546{
8547 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8548 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8549
8550 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8551 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8552 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8553
8554 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8555
8556 /* Free but don't flush the source register. */
8557 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8558 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8559
8560 return off;
8561}
8562
8563
8564#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
8565 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
8566
8567
8568/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
8569DECL_INLINE_THROW(uint32_t)
8570iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
8571 uint8_t idxSrcVar, uint8_t iDwSrc)
8572{
8573 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8574 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8575
8576 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8577 iDwDst < 4
8578 ? kIemNativeGstSimdRegLdStSz_Low128
8579 : kIemNativeGstSimdRegLdStSz_High128,
8580 kIemNativeGstRegUse_ForUpdate);
8581 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8582 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8583
8584 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
8585 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
8586
8587 /* Free but don't flush the source register. */
8588 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8589 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8590 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8591
8592 return off;
8593}
8594
8595
8596#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
8597 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
8598
8599
8600/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
8601DECL_INLINE_THROW(uint32_t)
8602iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
8603 uint8_t idxSrcVar, uint8_t iQwSrc)
8604{
8605 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8606 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8607
8608 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8609 iQwDst < 2
8610 ? kIemNativeGstSimdRegLdStSz_Low128
8611 : kIemNativeGstSimdRegLdStSz_High128,
8612 kIemNativeGstRegUse_ForUpdate);
8613 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8614 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8615
8616 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
8617 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
8618
8619 /* Free but don't flush the source register. */
8620 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8621 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8622 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8623
8624 return off;
8625}
8626
8627
8628#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
8629 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
8630
8631
8632/** Emits code for IEM_MC_STORE_YREG_U64. */
8633DECL_INLINE_THROW(uint32_t)
8634iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
8635{
8636 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8637 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8638
8639 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8640 iQwDst < 2
8641 ? kIemNativeGstSimdRegLdStSz_Low128
8642 : kIemNativeGstSimdRegLdStSz_High128,
8643 kIemNativeGstRegUse_ForUpdate);
8644
8645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8646
8647 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
8648
8649 /* Free but don't flush the source register. */
8650 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8651 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8652
8653 return off;
8654}
8655
8656
8657#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
8658 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
8659
8660/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
8661DECL_INLINE_THROW(uint32_t)
8662iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8663{
8664 RT_NOREF(pReNative, iYReg);
8665 /** @todo Needs to be implemented when support for AVX-512 is added. */
8666 return off;
8667}
8668
8669
8670#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
8671 off = iemNativeEmitSimdSseStoreResult(pReNative, off, a_SseData, a_iXmmReg)
8672
8673/** Emits code for IEM_MC_STORE_SSE_RESULT. */
8674DECL_INLINE_THROW(uint32_t)
8675iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseRes, uint8_t iXReg)
8676{
8677 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseRes);
8678 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseRes, sizeof(X86XMMREG));
8679
8680 /* The ForUpdate is important as we might end up not writing the result value to the register in case of an unmasked exception. */
8681 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8682 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8683 uint8_t const idxVarRegRes = iemNativeVarSimdRegisterAcquire(pReNative, idxSseRes, &off, true /*fInitalized*/);
8684 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8685 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8686
8687 /* Update the value if there is no unmasked exception. */
8688 /* tmp = mxcsr */
8689 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8690 /* tmp &= X86_MXCSR_XCPT_MASK */
8691 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
8692 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
8693 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
8694 /* tmp = ~tmp */
8695 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
8696 /* tmp &= mxcsr */
8697 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8698
8699 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8700 uint32_t offFixup = off;
8701 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8702 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarRegRes);
8703 iemNativeFixupFixedJump(pReNative, offFixup, off);
8704
8705 /* Free but don't flush the shadowed register. */
8706 iemNativeVarRegisterRelease(pReNative, idxSseRes);
8707 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8708 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8709 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8710
8711 return off;
8712}
8713
8714
8715/*********************************************************************************************************************************
8716* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8717*********************************************************************************************************************************/
8718
8719/**
8720 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX.
8721 */
8722DECL_INLINE_THROW(uint32_t)
8723iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8724{
8725 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
8726 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
8727 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8728 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8729
8730 /*
8731 * Need to do the FPU preparation.
8732 */
8733 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8734
8735 /*
8736 * Do all the call setup and cleanup.
8737 */
8738 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
8739
8740 /*
8741 * Load the MXCSR register into the first argument and mask out the current exception flags.
8742 */
8743 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
8744 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
8745
8746 /*
8747 * Make the call.
8748 */
8749 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8750
8751 /*
8752 * The updated MXCSR is in the return register.
8753 */
8754 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
8755
8756#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8757 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8758 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8759#endif
8760 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8761
8762 return off;
8763}
8764
8765
8766#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8767 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8768
8769/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8770DECL_INLINE_THROW(uint32_t)
8771iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8772{
8773 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8774 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8775 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 2);
8776}
8777
8778
8779#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8780 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8781
8782/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8783DECL_INLINE_THROW(uint32_t)
8784iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8785{
8786 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8787 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8788 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8789 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 3);
8790}
8791
8792
8793/*********************************************************************************************************************************
8794* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8795*********************************************************************************************************************************/
8796
8797/**
8798 * Common worker for IEM_MC_CALL_AVX_AIMPL_XXX.
8799 */
8800DECL_INLINE_THROW(uint32_t)
8801iemNativeEmitCallAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8802{
8803 /*
8804 * Need to do the FPU preparation.
8805 */
8806 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8807
8808 /*
8809 * Do all the call setup and cleanup.
8810 */
8811 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_AVX_AIMPL_HIDDEN_ARGS, IEM_AVX_AIMPL_HIDDEN_ARGS);
8812
8813 /*
8814 * Load the XState pointer.
8815 */
8816 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_XState, 0 /*idxRegInClass*/);
8817
8818 /*
8819 * Make the call.
8820 */
8821 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8822
8823 return off;
8824}
8825
8826
8827#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8828 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8829
8830/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8831DECL_INLINE_THROW(uint32_t)
8832iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8833{
8834 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8835 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8836 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8837}
8838
8839
8840#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8841 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8842
8843/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8844DECL_INLINE_THROW(uint32_t)
8845iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8846{
8847 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8848 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8849 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8850 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8851}
8852#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8853
8854
8855/*********************************************************************************************************************************
8856* Include instruction emitters. *
8857*********************************************************************************************************************************/
8858#include "target-x86/IEMAllN8veEmit-x86.h"
8859
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette