VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105172

Last change on this file since 105172 was 105035, checked in by vboxsync, 8 months ago

VMM/IEM,bs3-cpu-weird-1: Made bs3CpuWeird1_PcWrapping run w/o asserting in the recompiler. bugref:10715

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 486.1 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105035 2024-06-26 19:48:07Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 kIemNativeExitReason_ReturnWithFlags);
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** Helper for iemNativeEmitFinishInstructionWithStatus. */
348DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
349{
350 unsigned const offOpcodes = pCallEntry->offOpcode;
351 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
352 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
353 {
354 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
355 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
356 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
357 }
358 AssertFailedReturn(NIL_RTGCPHYS);
359}
360
361
362/** The VINF_SUCCESS dummy. */
363template<int const a_rcNormal, bool const a_fIsJump>
364DECL_FORCE_INLINE_THROW(uint32_t)
365iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
366 int32_t const offJump)
367{
368 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
369 if (a_rcNormal != VINF_SUCCESS)
370 {
371#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
373#else
374 RT_NOREF_PV(pCallEntry);
375#endif
376
377 /* As this code returns from the TB any pending register writes must be flushed. */
378 off = iemNativeRegFlushPendingWrites(pReNative, off);
379
380 /*
381 * Use the lookup table for getting to the next TB quickly.
382 * Note! In this code path there can only be one entry at present.
383 */
384 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
385 PCIEMTB const pTbOrg = pReNative->pTbOrg;
386 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
387 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
388
389#if 0
390 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
391 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
392 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
393 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
394 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
395
396 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreak);
397
398#else
399 /* Load the index as argument #1 for the helper call at the given label. */
400 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
401
402 /*
403 * Figure out the physical address of the current instruction and see
404 * whether the next instruction we're about to execute is in the same
405 * page so we by can optimistically skip TLB loading.
406 *
407 * - This is safe for all cases in FLAT mode.
408 * - In segmentmented modes it is complicated, given that a negative
409 * jump may underflow EIP and a forward jump may overflow or run into
410 * CS.LIM and triggering a #GP. The only thing we can get away with
411 * now at compile time is forward jumps w/o CS.LIM checks, since the
412 * lack of CS.LIM checks means we're good for the entire physical page
413 * we're executing on and another 15 bytes before we run into CS.LIM.
414 */
415 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
416# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
417 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
418# endif
419 )
420 {
421 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
422 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
423 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
424 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
425
426 {
427 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
428 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
429
430 /* Load the key lookup flags into the 2nd argument for the helper call.
431 - This is safe wrt CS limit checking since we're only here for FLAT modes.
432 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
433 interrupt shadow.
434 - The NMI inhibiting is more questionable, though... */
435 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
436 * Should we copy it into fExec to simplify this? OTOH, it's just a
437 * couple of extra instructions if EFLAGS are already in a register. */
438 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
439 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
440
441 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
442 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookup);
443 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithIrq);
444 }
445 }
446 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
447 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithTlb);
448 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithTlbAndIrq);
449#endif
450 }
451 return off;
452}
453
454
455#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
456 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
457 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
458
459#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
460 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
461 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
462 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
463
464/** Same as iemRegAddToRip64AndFinishingNoFlags. */
465DECL_INLINE_THROW(uint32_t)
466iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
467{
468#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
469# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
470 if (!pReNative->Core.offPc)
471 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
472# endif
473
474 /* Allocate a temporary PC register. */
475 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
476
477 /* Perform the addition and store the result. */
478 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
479 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
480
481 /* Free but don't flush the PC register. */
482 iemNativeRegFreeTmp(pReNative, idxPcReg);
483#endif
484
485#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
486 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
487
488 pReNative->Core.offPc += cbInstr;
489# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
490 off = iemNativePcAdjustCheck(pReNative, off);
491# endif
492 if (pReNative->cCondDepth)
493 off = iemNativeEmitPcWriteback(pReNative, off);
494 else
495 pReNative->Core.cInstrPcUpdateSkipped++;
496#endif
497
498 return off;
499}
500
501
502#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
503 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
504 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
505
506#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
507 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
508 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
509 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
510
511/** Same as iemRegAddToEip32AndFinishingNoFlags. */
512DECL_INLINE_THROW(uint32_t)
513iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
514{
515#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
516# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
517 if (!pReNative->Core.offPc)
518 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
519# endif
520
521 /* Allocate a temporary PC register. */
522 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
523
524 /* Perform the addition and store the result. */
525 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
527
528 /* Free but don't flush the PC register. */
529 iemNativeRegFreeTmp(pReNative, idxPcReg);
530#endif
531
532#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
533 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
534
535 pReNative->Core.offPc += cbInstr;
536# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
537 off = iemNativePcAdjustCheck(pReNative, off);
538# endif
539 if (pReNative->cCondDepth)
540 off = iemNativeEmitPcWriteback(pReNative, off);
541 else
542 pReNative->Core.cInstrPcUpdateSkipped++;
543#endif
544
545 return off;
546}
547
548
549#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
550 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
552
553#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
554 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
557
558/** Same as iemRegAddToIp16AndFinishingNoFlags. */
559DECL_INLINE_THROW(uint32_t)
560iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
561{
562#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
563# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
564 if (!pReNative->Core.offPc)
565 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
566# endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition and store the result. */
572 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
573 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
574 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
575
576 /* Free but don't flush the PC register. */
577 iemNativeRegFreeTmp(pReNative, idxPcReg);
578#endif
579
580#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
581 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
582
583 pReNative->Core.offPc += cbInstr;
584# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
585 off = iemNativePcAdjustCheck(pReNative, off);
586# endif
587 if (pReNative->cCondDepth)
588 off = iemNativeEmitPcWriteback(pReNative, off);
589 else
590 pReNative->Core.cInstrPcUpdateSkipped++;
591#endif
592
593 return off;
594}
595
596
597
598/*********************************************************************************************************************************
599* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
600*********************************************************************************************************************************/
601
602#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
603 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
604 (a_enmEffOpSize), pCallEntry->idxInstr); \
605 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
606
607#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
608 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
609 (a_enmEffOpSize), pCallEntry->idxInstr); \
610 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
611 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
612
613#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
614 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
615 IEMMODE_16BIT, pCallEntry->idxInstr); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
617
618#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
620 IEMMODE_16BIT, pCallEntry->idxInstr); \
621 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
622 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
623
624#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
625 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
626 IEMMODE_64BIT, pCallEntry->idxInstr); \
627 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
628
629#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
630 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
631 IEMMODE_64BIT, pCallEntry->idxInstr); \
632 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
633 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
634
635/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
636 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
637 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
640 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
641{
642 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
643
644 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
645 off = iemNativeRegFlushPendingWrites(pReNative, off);
646
647#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
648 Assert(pReNative->Core.offPc == 0);
649
650 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
651#endif
652
653 /* Allocate a temporary PC register. */
654 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
655
656 /* Perform the addition. */
657 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
658
659 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
660 {
661 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
662 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
663 }
664 else
665 {
666 /* Just truncate the result to 16-bit IP. */
667 Assert(enmEffOpSize == IEMMODE_16BIT);
668 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
669 }
670 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
671
672 /* Free but don't flush the PC register. */
673 iemNativeRegFreeTmp(pReNative, idxPcReg);
674
675 return off;
676}
677
678
679#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
680 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
681 (a_enmEffOpSize), pCallEntry->idxInstr); \
682 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
683
684#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
685 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
686 (a_enmEffOpSize), pCallEntry->idxInstr); \
687 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
689
690#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
692 IEMMODE_16BIT, pCallEntry->idxInstr); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
694
695#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
696 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
697 IEMMODE_16BIT, pCallEntry->idxInstr); \
698 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
699 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
700
701#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
702 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
703 IEMMODE_32BIT, pCallEntry->idxInstr); \
704 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
705
706#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
707 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
708 IEMMODE_32BIT, pCallEntry->idxInstr); \
709 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
710 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
711
712/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
713 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
714 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
715DECL_INLINE_THROW(uint32_t)
716iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
717 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
718{
719 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
720
721 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
722 off = iemNativeRegFlushPendingWrites(pReNative, off);
723
724#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
725 Assert(pReNative->Core.offPc == 0);
726
727 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
728#endif
729
730 /* Allocate a temporary PC register. */
731 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
732
733 /* Perform the addition. */
734 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
735
736 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
737 if (enmEffOpSize == IEMMODE_16BIT)
738 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
739
740 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
741/** @todo we can skip this in 32-bit FLAT mode. */
742 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
743
744 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
745
746 /* Free but don't flush the PC register. */
747 iemNativeRegFreeTmp(pReNative, idxPcReg);
748
749 return off;
750}
751
752
753#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
754 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
755 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
756
757#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
758 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
759 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
760 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
761
762#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
763 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
764 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
765
766#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
767 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
768 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
769 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
770
771#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
772 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
773 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
774
775#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
776 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
777 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
778 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
779
780/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
781DECL_INLINE_THROW(uint32_t)
782iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
783 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
784{
785 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
786 off = iemNativeRegFlushPendingWrites(pReNative, off);
787
788#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
789 Assert(pReNative->Core.offPc == 0);
790
791 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
792#endif
793
794 /* Allocate a temporary PC register. */
795 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
796
797 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
798 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
799 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
800 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
801 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
802
803 /* Free but don't flush the PC register. */
804 iemNativeRegFreeTmp(pReNative, idxPcReg);
805
806 return off;
807}
808
809
810
811/*********************************************************************************************************************************
812* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
813*********************************************************************************************************************************/
814
815/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
816#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
817 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
818
819/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
820#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
821 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
822
823/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
824#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
825 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
826
827/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
828 * clears flags. */
829#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
830 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
831 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
832
833/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
834 * clears flags. */
835#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
836 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
837 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
838
839/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
840 * clears flags. */
841#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
842 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
843 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
844
845#undef IEM_MC_SET_RIP_U16_AND_FINISH
846
847
848/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
849#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
850 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
851
852/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
853#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
854 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
855
856/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
857 * clears flags. */
858#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
859 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
860 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
861
862/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
863 * and clears flags. */
864#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
865 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
866 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
867
868#undef IEM_MC_SET_RIP_U32_AND_FINISH
869
870
871/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
872#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
873 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
874
875/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
876 * and clears flags. */
877#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
878 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
879 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
880
881#undef IEM_MC_SET_RIP_U64_AND_FINISH
882
883
884/** Same as iemRegRipJumpU16AndFinishNoFlags,
885 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
886DECL_INLINE_THROW(uint32_t)
887iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
888 uint8_t idxInstr, uint8_t cbVar)
889{
890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
892
893 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
894 off = iemNativeRegFlushPendingWrites(pReNative, off);
895
896#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
897 Assert(pReNative->Core.offPc == 0);
898
899 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
900#endif
901
902 /* Get a register with the new PC loaded from idxVarPc.
903 Note! This ASSUMES that the high bits of the GPR is zeroed. */
904 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
905
906 /* Check limit (may #GP(0) + exit TB). */
907 if (!f64Bit)
908/** @todo we can skip this test in FLAT 32-bit mode. */
909 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
910 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
911 else if (cbVar > sizeof(uint32_t))
912 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
913
914 /* Store the result. */
915 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
916
917 iemNativeVarRegisterRelease(pReNative, idxVarPc);
918 /** @todo implictly free the variable? */
919
920 return off;
921}
922
923
924
925/*********************************************************************************************************************************
926* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
927*********************************************************************************************************************************/
928
929/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
930 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
931DECL_FORCE_INLINE_THROW(uint32_t)
932iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
933{
934 /* Use16BitSp: */
935#ifdef RT_ARCH_AMD64
936 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
937 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
938#else
939 /* sub regeff, regrsp, #cbMem */
940 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
941 /* and regeff, regeff, #0xffff */
942 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
943 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
944 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
945 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
946#endif
947 return off;
948}
949
950
951DECL_FORCE_INLINE(uint32_t)
952iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
953{
954 /* Use32BitSp: */
955 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
956 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
957 return off;
958}
959
960
961DECL_INLINE_THROW(uint32_t)
962iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
963 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
964{
965 /*
966 * Assert sanity.
967 */
968#ifdef VBOX_STRICT
969 if (RT_BYTE2(cBitsVarAndFlat) != 0)
970 {
971 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
972 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
973 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
974 Assert( pfnFunction
975 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
976 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
977 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
978 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
979 : UINT64_C(0xc000b000a0009000) ));
980 }
981 else
982 Assert( pfnFunction
983 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
984 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
985 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
986 : UINT64_C(0xc000b000a0009000) ));
987#endif
988
989#ifdef VBOX_STRICT
990 /*
991 * Check that the fExec flags we've got make sense.
992 */
993 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
994#endif
995
996 /*
997 * To keep things simple we have to commit any pending writes first as we
998 * may end up making calls.
999 */
1000 /** @todo we could postpone this till we make the call and reload the
1001 * registers after returning from the call. Not sure if that's sensible or
1002 * not, though. */
1003 off = iemNativeRegFlushPendingWrites(pReNative, off);
1004
1005 /*
1006 * First we calculate the new RSP and the effective stack pointer value.
1007 * For 64-bit mode and flat 32-bit these two are the same.
1008 * (Code structure is very similar to that of PUSH)
1009 */
1010 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1011 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1012 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1013 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1014 ? cbMem : sizeof(uint16_t);
1015 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1016 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1017 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1018 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1019 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1020 if (cBitsFlat != 0)
1021 {
1022 Assert(idxRegEffSp == idxRegRsp);
1023 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1024 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1025 if (cBitsFlat == 64)
1026 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1027 else
1028 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1029 }
1030 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1031 {
1032 Assert(idxRegEffSp != idxRegRsp);
1033 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1034 kIemNativeGstRegUse_ReadOnly);
1035#ifdef RT_ARCH_AMD64
1036 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1037#else
1038 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1039#endif
1040 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1041 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1042 offFixupJumpToUseOtherBitSp = off;
1043 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1044 {
1045 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1046 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1047 }
1048 else
1049 {
1050 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1051 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1052 }
1053 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1054 }
1055 /* SpUpdateEnd: */
1056 uint32_t const offLabelSpUpdateEnd = off;
1057
1058 /*
1059 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1060 * we're skipping lookup).
1061 */
1062 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1063 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1064 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1065 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1066 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1067 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1068 : UINT32_MAX;
1069 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1070
1071
1072 if (!TlbState.fSkip)
1073 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1074 else
1075 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1076
1077 /*
1078 * Use16BitSp:
1079 */
1080 if (cBitsFlat == 0)
1081 {
1082#ifdef RT_ARCH_AMD64
1083 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1084#else
1085 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1086#endif
1087 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1088 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1089 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1090 else
1091 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1092 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1094 }
1095
1096 /*
1097 * TlbMiss:
1098 *
1099 * Call helper to do the pushing.
1100 */
1101 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1102
1103#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1104 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1105#else
1106 RT_NOREF(idxInstr);
1107#endif
1108
1109 /* Save variables in volatile registers. */
1110 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1111 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1112 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1113 | (RT_BIT_32(idxRegPc));
1114 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1115
1116 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1117 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1118 {
1119 /* Swap them using ARG0 as temp register: */
1120 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1121 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1122 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1123 }
1124 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1125 {
1126 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1127 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1128
1129 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1130 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1131 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1132 }
1133 else
1134 {
1135 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1137
1138 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1140 }
1141
1142 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1143 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1144
1145 /* Done setting up parameters, make the call. */
1146 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1147
1148 /* Restore variables and guest shadow registers to volatile registers. */
1149 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1150 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1151
1152#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1153 if (!TlbState.fSkip)
1154 {
1155 /* end of TlbMiss - Jump to the done label. */
1156 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1157 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1158
1159 /*
1160 * TlbLookup:
1161 */
1162 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1163 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1164
1165 /*
1166 * Emit code to do the actual storing / fetching.
1167 */
1168 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1169# ifdef IEM_WITH_TLB_STATISTICS
1170 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1171 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1172# endif
1173 switch (cbMemAccess)
1174 {
1175 case 2:
1176 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1177 break;
1178 case 4:
1179 if (!fIsIntelSeg)
1180 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1181 else
1182 {
1183 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1184 PUSH FS in real mode, so we have to try emulate that here.
1185 We borrow the now unused idxReg1 from the TLB lookup code here. */
1186 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1187 kIemNativeGstReg_EFlags);
1188 if (idxRegEfl != UINT8_MAX)
1189 {
1190#ifdef ARCH_AMD64
1191 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1192 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1193 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1194#else
1195 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1196 off, TlbState.idxReg1, idxRegEfl,
1197 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1198#endif
1199 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1200 }
1201 else
1202 {
1203 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1204 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1205 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1206 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1207 }
1208 /* ASSUMES the upper half of idxRegPc is ZERO. */
1209 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1210 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1211 }
1212 break;
1213 case 8:
1214 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1215 break;
1216 default:
1217 AssertFailed();
1218 }
1219
1220 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1221 TlbState.freeRegsAndReleaseVars(pReNative);
1222
1223 /*
1224 * TlbDone:
1225 *
1226 * Commit the new RSP value.
1227 */
1228 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1229 }
1230#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1231
1232#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1233 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1234#endif
1235 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1236 if (idxRegEffSp != idxRegRsp)
1237 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1238
1239 return off;
1240}
1241
1242
1243/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1244#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1245 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1246
1247/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1248 * clears flags. */
1249#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1250 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1251 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1252
1253/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1254#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1255 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1256
1257/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1258 * clears flags. */
1259#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1260 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1261 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1262
1263#undef IEM_MC_IND_CALL_U16_AND_FINISH
1264
1265
1266/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1267#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1268 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1269
1270/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1271 * clears flags. */
1272#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1273 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1274 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1275
1276#undef IEM_MC_IND_CALL_U32_AND_FINISH
1277
1278
1279/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1280 * an extra parameter, for use in 64-bit code. */
1281#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1282 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1283
1284
1285/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1286 * an extra parameter, for use in 64-bit code and we need to check and clear
1287 * flags. */
1288#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1289 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1290 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1291
1292#undef IEM_MC_IND_CALL_U64_AND_FINISH
1293
1294/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1295 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1298 uint8_t idxInstr, uint8_t cbVar)
1299{
1300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1301 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1302
1303 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1304 off = iemNativeRegFlushPendingWrites(pReNative, off);
1305
1306#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1307 Assert(pReNative->Core.offPc == 0);
1308
1309 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1310#endif
1311
1312 /* Get a register with the new PC loaded from idxVarPc.
1313 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1314 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1315
1316 /* Check limit (may #GP(0) + exit TB). */
1317 if (!f64Bit)
1318/** @todo we can skip this test in FLAT 32-bit mode. */
1319 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1320 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1321 else if (cbVar > sizeof(uint32_t))
1322 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1323
1324#if 1
1325 /* Allocate a temporary PC register, we don't want it shadowed. */
1326 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1327 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1328#else
1329 /* Allocate a temporary PC register. */
1330 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1331 true /*fNoVolatileRegs*/);
1332#endif
1333
1334 /* Perform the addition and push the variable to the guest stack. */
1335 /** @todo Flat variants for PC32 variants. */
1336 switch (cbVar)
1337 {
1338 case sizeof(uint16_t):
1339 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1340 /* Truncate the result to 16-bit IP. */
1341 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1342 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1343 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1344 break;
1345 case sizeof(uint32_t):
1346 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1347 /** @todo In FLAT mode we can use the flat variant. */
1348 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1349 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1350 break;
1351 case sizeof(uint64_t):
1352 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1353 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1354 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1355 break;
1356 default:
1357 AssertFailed();
1358 }
1359
1360 /* RSP got changed, so do this again. */
1361 off = iemNativeRegFlushPendingWrites(pReNative, off);
1362
1363 /* Store the result. */
1364 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1365
1366#if 1
1367 /* Need to transfer the shadow information to the new RIP register. */
1368 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1369#else
1370 /* Sync the new PC. */
1371 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1372#endif
1373 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1374 iemNativeRegFreeTmp(pReNative, idxPcReg);
1375 /** @todo implictly free the variable? */
1376
1377 return off;
1378}
1379
1380
1381/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1382 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1383#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1384 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1385
1386/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1387 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1388 * flags. */
1389#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1390 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1391 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1392
1393/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1394 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1395#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1396 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1397
1398/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1399 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1400 * flags. */
1401#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1402 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1403 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1404
1405/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1406 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1407#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1408 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1409
1410/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1411 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1412 * flags. */
1413#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1414 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1415 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1416
1417#undef IEM_MC_REL_CALL_S16_AND_FINISH
1418
1419/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1420 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1421DECL_INLINE_THROW(uint32_t)
1422iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1423 uint8_t idxInstr)
1424{
1425 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1426 off = iemNativeRegFlushPendingWrites(pReNative, off);
1427
1428#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1429 Assert(pReNative->Core.offPc == 0);
1430
1431 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1432#endif
1433
1434 /* Allocate a temporary PC register. */
1435 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1436 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1437 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1438
1439 /* Calculate the new RIP. */
1440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1441 /* Truncate the result to 16-bit IP. */
1442 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1443 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1444 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1445
1446 /* Truncate the result to 16-bit IP. */
1447 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1448
1449 /* Check limit (may #GP(0) + exit TB). */
1450 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1451
1452 /* Perform the addition and push the variable to the guest stack. */
1453 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1454 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1455
1456 /* RSP got changed, so flush again. */
1457 off = iemNativeRegFlushPendingWrites(pReNative, off);
1458
1459 /* Store the result. */
1460 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1461
1462 /* Need to transfer the shadow information to the new RIP register. */
1463 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1464 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1465 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1466
1467 return off;
1468}
1469
1470
1471/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1472 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1473#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1474 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1475
1476/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1477 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1478 * flags. */
1479#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1480 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1481 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1482
1483#undef IEM_MC_REL_CALL_S32_AND_FINISH
1484
1485/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1486 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1487DECL_INLINE_THROW(uint32_t)
1488iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1489 uint8_t idxInstr)
1490{
1491 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1492 off = iemNativeRegFlushPendingWrites(pReNative, off);
1493
1494#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1495 Assert(pReNative->Core.offPc == 0);
1496
1497 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1498#endif
1499
1500 /* Allocate a temporary PC register. */
1501 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1502 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1503 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1504
1505 /* Update the EIP to get the return address. */
1506 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1507
1508 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1509 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1510 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1511 /** @todo we can skip this test in FLAT 32-bit mode. */
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1513
1514 /* Perform Perform the return address to the guest stack. */
1515 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1516 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1517 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1518
1519 /* RSP got changed, so do this again. */
1520 off = iemNativeRegFlushPendingWrites(pReNative, off);
1521
1522 /* Store the result. */
1523 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1524
1525 /* Need to transfer the shadow information to the new RIP register. */
1526 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1527 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1528 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1529
1530 return off;
1531}
1532
1533
1534/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1535 * an extra parameter, for use in 64-bit code. */
1536#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1537 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1538
1539/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1540 * an extra parameter, for use in 64-bit code and we need to check and clear
1541 * flags. */
1542#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1543 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1544 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1545
1546#undef IEM_MC_REL_CALL_S64_AND_FINISH
1547
1548/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1549 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1550DECL_INLINE_THROW(uint32_t)
1551iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1552 uint8_t idxInstr)
1553{
1554 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1555 off = iemNativeRegFlushPendingWrites(pReNative, off);
1556
1557#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1558 Assert(pReNative->Core.offPc == 0);
1559
1560 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1561#endif
1562
1563 /* Allocate a temporary PC register. */
1564 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1565 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1566 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1567
1568 /* Update the RIP to get the return address. */
1569 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1570
1571 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1572 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1573 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1574 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1575
1576 /* Perform Perform the return address to the guest stack. */
1577 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1578 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1579
1580 /* RSP got changed, so do this again. */
1581 off = iemNativeRegFlushPendingWrites(pReNative, off);
1582
1583 /* Store the result. */
1584 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1585
1586 /* Need to transfer the shadow information to the new RIP register. */
1587 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1588 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1589 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1590
1591 return off;
1592}
1593
1594
1595/*********************************************************************************************************************************
1596* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1597*********************************************************************************************************************************/
1598
1599DECL_FORCE_INLINE_THROW(uint32_t)
1600iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1601 uint16_t cbPopAdd, uint8_t idxRegTmp)
1602{
1603 /* Use16BitSp: */
1604#ifdef RT_ARCH_AMD64
1605 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1606 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1607 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1608 RT_NOREF(idxRegTmp);
1609#elif defined(RT_ARCH_ARM64)
1610 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1611 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1612 /* add tmp, regrsp, #cbMem */
1613 uint16_t const cbCombined = cbMem + cbPopAdd;
1614 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1615 if (cbCombined >= RT_BIT_32(12))
1616 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1617 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1618 /* and tmp, tmp, #0xffff */
1619 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1620 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1621 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1622 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1623#else
1624# error "Port me"
1625#endif
1626 return off;
1627}
1628
1629
1630DECL_FORCE_INLINE_THROW(uint32_t)
1631iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1632 uint16_t cbPopAdd)
1633{
1634 /* Use32BitSp: */
1635 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1636 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1637 return off;
1638}
1639
1640
1641/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1642#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1643 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1644
1645/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1646#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1647 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1648
1649/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1650#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1651 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1652
1653/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1654 * clears flags. */
1655#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1656 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1657 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1658
1659/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1660 * clears flags. */
1661#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1662 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1663 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1664
1665/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1666 * clears flags. */
1667#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1668 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1669 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1670
1671/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1672DECL_INLINE_THROW(uint32_t)
1673iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1674 IEMMODE enmEffOpSize, uint8_t idxInstr)
1675{
1676 RT_NOREF(cbInstr);
1677
1678#ifdef VBOX_STRICT
1679 /*
1680 * Check that the fExec flags we've got make sense.
1681 */
1682 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1683#endif
1684
1685 /*
1686 * To keep things simple we have to commit any pending writes first as we
1687 * may end up making calls.
1688 */
1689 off = iemNativeRegFlushPendingWrites(pReNative, off);
1690
1691 /*
1692 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1693 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1694 * directly as the effective stack pointer.
1695 * (Code structure is very similar to that of PUSH)
1696 *
1697 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1698 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1699 * aren't commonly used (or useful) and thus not in need of optimizing.
1700 *
1701 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1702 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1703 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1704 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1705 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1706 */
1707 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1708 ? sizeof(uint64_t)
1709 : enmEffOpSize == IEMMODE_32BIT
1710 ? sizeof(uint32_t)
1711 : sizeof(uint16_t);
1712 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1713 uintptr_t const pfnFunction = fFlat
1714 ? enmEffOpSize == IEMMODE_64BIT
1715 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1716 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1717 : enmEffOpSize == IEMMODE_32BIT
1718 ? (uintptr_t)iemNativeHlpStackFetchU32
1719 : (uintptr_t)iemNativeHlpStackFetchU16;
1720 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1721 fFlat ? kIemNativeGstRegUse_ForUpdate : kIemNativeGstRegUse_Calculation,
1722 true /*fNoVolatileRegs*/);
1723 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1724 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1725 * will be the resulting register value. */
1726 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1727
1728 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1729 if (fFlat)
1730 Assert(idxRegEffSp == idxRegRsp);
1731 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1732 {
1733 Assert(idxRegEffSp != idxRegRsp);
1734 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1735 kIemNativeGstRegUse_ReadOnly);
1736#ifdef RT_ARCH_AMD64
1737 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1738#else
1739 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1740#endif
1741 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1742 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1743 offFixupJumpToUseOtherBitSp = off;
1744 if (enmEffOpSize == IEMMODE_32BIT)
1745 {
1746 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1747 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1748 }
1749 else
1750 {
1751 Assert(enmEffOpSize == IEMMODE_16BIT);
1752 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1753 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1754 idxRegMemResult);
1755 }
1756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1757 }
1758 /* SpUpdateEnd: */
1759 uint32_t const offLabelSpUpdateEnd = off;
1760
1761 /*
1762 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1763 * we're skipping lookup).
1764 */
1765 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1766 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1767 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1768 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1769 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1770 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1771 : UINT32_MAX;
1772
1773 if (!TlbState.fSkip)
1774 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1775 else
1776 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1777
1778 /*
1779 * Use16BitSp:
1780 */
1781 if (!fFlat)
1782 {
1783#ifdef RT_ARCH_AMD64
1784 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1785#else
1786 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1787#endif
1788 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1789 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1790 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1791 idxRegMemResult);
1792 else
1793 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1794 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1795 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1796 }
1797
1798 /*
1799 * TlbMiss:
1800 *
1801 * Call helper to do the pushing.
1802 */
1803 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1804
1805#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1806 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1807#else
1808 RT_NOREF(idxInstr);
1809#endif
1810
1811 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1812 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1813 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1814 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1815
1816
1817 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1818 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1819 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1820
1821 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1822 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1823
1824 /* Done setting up parameters, make the call. */
1825 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1826
1827 /* Move the return register content to idxRegMemResult. */
1828 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1830
1831 /* Restore variables and guest shadow registers to volatile registers. */
1832 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1833 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1834
1835#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1836 if (!TlbState.fSkip)
1837 {
1838 /* end of TlbMiss - Jump to the done label. */
1839 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1840 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1841
1842 /*
1843 * TlbLookup:
1844 */
1845 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1846 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1847
1848 /*
1849 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1850 */
1851 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1852# ifdef IEM_WITH_TLB_STATISTICS
1853 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1854 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1855# endif
1856 switch (cbMem)
1857 {
1858 case 2:
1859 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1860 break;
1861 case 4:
1862 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1863 break;
1864 case 8:
1865 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1866 break;
1867 default:
1868 AssertFailed();
1869 }
1870
1871 TlbState.freeRegsAndReleaseVars(pReNative);
1872
1873 /*
1874 * TlbDone:
1875 *
1876 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1877 * commit the popped register value.
1878 */
1879 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1880 }
1881#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1882
1883 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1884 if (!f64Bit)
1885/** @todo we can skip this test in FLAT 32-bit mode. */
1886 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1887 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1888 else if (enmEffOpSize == IEMMODE_64BIT)
1889 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1890
1891 /* Complete RSP calculation for FLAT mode. */
1892 if (idxRegEffSp == idxRegRsp)
1893 {
1894 if (enmEffOpSize == IEMMODE_64BIT)
1895 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1896 else
1897 {
1898 Assert(enmEffOpSize == IEMMODE_32BIT);
1899 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1900 }
1901 }
1902
1903 /* Commit the result and clear any current guest shadows for RIP. */
1904 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1905 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1906 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1907
1908 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1909 if (!fFlat)
1910 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1911
1912 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1913 if (idxRegEffSp != idxRegRsp)
1914 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1915 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1916 return off;
1917}
1918
1919
1920/*********************************************************************************************************************************
1921* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1922*********************************************************************************************************************************/
1923
1924#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1925 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1926
1927/**
1928 * Emits code to check if a \#NM exception should be raised.
1929 *
1930 * @returns New code buffer offset, UINT32_MAX on failure.
1931 * @param pReNative The native recompile state.
1932 * @param off The code buffer offset.
1933 * @param idxInstr The current instruction.
1934 */
1935DECL_INLINE_THROW(uint32_t)
1936iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1937{
1938#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1939 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1940
1941 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1942 {
1943#endif
1944 /*
1945 * Make sure we don't have any outstanding guest register writes as we may
1946 * raise an #NM and all guest register must be up to date in CPUMCTX.
1947 */
1948 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1949 off = iemNativeRegFlushPendingWrites(pReNative, off);
1950
1951#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1952 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1953#else
1954 RT_NOREF(idxInstr);
1955#endif
1956
1957 /* Allocate a temporary CR0 register. */
1958 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
1959
1960 /*
1961 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1962 * return raisexcpt();
1963 */
1964 /* Test and jump. */
1965 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, kIemNativeExitReason_RaiseNm);
1966
1967 /* Free but don't flush the CR0 register. */
1968 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1969
1970#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1971 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1972 }
1973 else
1974 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1975#endif
1976
1977 return off;
1978}
1979
1980
1981#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1982 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1983
1984/**
1985 * Emits code to check if a \#NM exception should be raised.
1986 *
1987 * @returns New code buffer offset, UINT32_MAX on failure.
1988 * @param pReNative The native recompile state.
1989 * @param off The code buffer offset.
1990 * @param idxInstr The current instruction.
1991 */
1992DECL_INLINE_THROW(uint32_t)
1993iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1994{
1995#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1996 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
1997
1998 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
1999 {
2000#endif
2001 /*
2002 * Make sure we don't have any outstanding guest register writes as we may
2003 * raise an #NM and all guest register must be up to date in CPUMCTX.
2004 */
2005 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2006 off = iemNativeRegFlushPendingWrites(pReNative, off);
2007
2008#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2009 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2010#else
2011 RT_NOREF(idxInstr);
2012#endif
2013
2014 /* Allocate a temporary CR0 register. */
2015 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
2016
2017 /*
2018 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2019 * return raisexcpt();
2020 */
2021 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2022 /* Test and jump. */
2023 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeExitReason_RaiseNm);
2024
2025 /* Free the CR0 register. */
2026 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2027
2028#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2029 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2030 }
2031 else
2032 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2033#endif
2034
2035 return off;
2036}
2037
2038
2039#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2040 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2041
2042/**
2043 * Emits code to check if a \#MF exception should be raised.
2044 *
2045 * @returns New code buffer offset, UINT32_MAX on failure.
2046 * @param pReNative The native recompile state.
2047 * @param off The code buffer offset.
2048 * @param idxInstr The current instruction.
2049 */
2050DECL_INLINE_THROW(uint32_t)
2051iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2052{
2053 /*
2054 * Make sure we don't have any outstanding guest register writes as we may
2055 * raise an #MF and all guest register must be up to date in CPUMCTX.
2056 */
2057 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2058 off = iemNativeRegFlushPendingWrites(pReNative, off);
2059
2060#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2061 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2062#else
2063 RT_NOREF(idxInstr);
2064#endif
2065
2066 /* Allocate a temporary FSW register. */
2067 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
2068
2069 /*
2070 * if (FSW & X86_FSW_ES != 0)
2071 * return raisexcpt();
2072 */
2073 /* Test and jump. */
2074 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeExitReason_RaiseMf);
2075
2076 /* Free but don't flush the FSW register. */
2077 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2078
2079 return off;
2080}
2081
2082
2083#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2084 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2085
2086/**
2087 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2088 *
2089 * @returns New code buffer offset, UINT32_MAX on failure.
2090 * @param pReNative The native recompile state.
2091 * @param off The code buffer offset.
2092 * @param idxInstr The current instruction.
2093 */
2094DECL_INLINE_THROW(uint32_t)
2095iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2096{
2097#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2098 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2099
2100 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2101 {
2102#endif
2103 /*
2104 * Make sure we don't have any outstanding guest register writes as we may
2105 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2106 */
2107 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2108 off = iemNativeRegFlushPendingWrites(pReNative, off);
2109
2110#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2111 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2112#else
2113 RT_NOREF(idxInstr);
2114#endif
2115
2116 /* Allocate a temporary CR0 and CR4 register. */
2117 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2118 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2119 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2120
2121 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2122#ifdef RT_ARCH_AMD64
2123 /*
2124 * We do a modified test here:
2125 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2126 * else { goto RaiseSseRelated; }
2127 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2128 * all targets except the 386, which doesn't support SSE, this should
2129 * be a safe assumption.
2130 */
2131 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2132 //pCodeBuf[off++] = 0xcc;
2133 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2134 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2135 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2136 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2137 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2138 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeExitReason_RaiseSseRelated, kIemNativeInstrCond_ne);
2139
2140#elif defined(RT_ARCH_ARM64)
2141 /*
2142 * We do a modified test here:
2143 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2144 * else { goto RaiseSseRelated; }
2145 */
2146 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2147 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2148 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2149 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2150 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2151 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2152 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2153 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2154 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2155 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2156 kIemNativeExitReason_RaiseSseRelated);
2157
2158#else
2159# error "Port me!"
2160#endif
2161
2162 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2163 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2164 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2165 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2166
2167#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2168 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2169 }
2170 else
2171 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2172#endif
2173
2174 return off;
2175}
2176
2177
2178#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2179 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2180
2181/**
2182 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2183 *
2184 * @returns New code buffer offset, UINT32_MAX on failure.
2185 * @param pReNative The native recompile state.
2186 * @param off The code buffer offset.
2187 * @param idxInstr The current instruction.
2188 */
2189DECL_INLINE_THROW(uint32_t)
2190iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2191{
2192#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2193 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2194
2195 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2196 {
2197#endif
2198 /*
2199 * Make sure we don't have any outstanding guest register writes as we may
2200 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2201 */
2202 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2203 off = iemNativeRegFlushPendingWrites(pReNative, off);
2204
2205#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2206 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2207#else
2208 RT_NOREF(idxInstr);
2209#endif
2210
2211 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2212 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2213 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2214 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2215 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2216
2217 /*
2218 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2219 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2220 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2221 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2222 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2223 * { likely }
2224 * else { goto RaiseAvxRelated; }
2225 */
2226#ifdef RT_ARCH_AMD64
2227 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2228 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2229 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2230 ^ 0x1a) ) { likely }
2231 else { goto RaiseAvxRelated; } */
2232 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2233 //pCodeBuf[off++] = 0xcc;
2234 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2235 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2236 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2237 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2238 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2239 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2240 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2241 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2242 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2243 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2244 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeExitReason_RaiseAvxRelated, kIemNativeInstrCond_ne);
2245
2246#elif defined(RT_ARCH_ARM64)
2247 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2248 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2249 else { goto RaiseAvxRelated; } */
2250 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2251 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2252 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2253 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2254 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2255 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2256 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2257 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2258 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2259 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2260 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2261 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2262 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2263 kIemNativeExitReason_RaiseAvxRelated);
2264
2265#else
2266# error "Port me!"
2267#endif
2268
2269 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2270 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2271 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2272 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2273#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2274 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2275 }
2276 else
2277 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2278#endif
2279
2280 return off;
2281}
2282
2283
2284#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2285#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2286 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2287
2288/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2289DECL_INLINE_THROW(uint32_t)
2290iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2291{
2292 /*
2293 * Make sure we don't have any outstanding guest register writes as we may
2294 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2295 */
2296 off = iemNativeRegFlushPendingWrites(pReNative, off);
2297
2298#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2299 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2300#else
2301 RT_NOREF(idxInstr);
2302#endif
2303
2304 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
2305 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2306
2307 /* mov tmp, varmxcsr */
2308 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2309 /* tmp &= X86_MXCSR_XCPT_MASK */
2310 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2311 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2312 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2313 /* tmp = ~tmp */
2314 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2315 /* tmp &= mxcsr */
2316 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2317 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2318 kIemNativeExitReason_RaiseSseAvxFpRelated);
2319
2320 /* Free but don't flush the MXCSR register. */
2321 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2322 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2323
2324 return off;
2325}
2326#endif
2327
2328
2329#define IEM_MC_RAISE_DIVIDE_ERROR() \
2330 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2331
2332/**
2333 * Emits code to raise a \#DE.
2334 *
2335 * @returns New code buffer offset, UINT32_MAX on failure.
2336 * @param pReNative The native recompile state.
2337 * @param off The code buffer offset.
2338 * @param idxInstr The current instruction.
2339 */
2340DECL_INLINE_THROW(uint32_t)
2341iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2342{
2343 /*
2344 * Make sure we don't have any outstanding guest register writes as we may
2345 */
2346 off = iemNativeRegFlushPendingWrites(pReNative, off);
2347
2348#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2349 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2350#else
2351 RT_NOREF(idxInstr);
2352#endif
2353
2354 /* raise \#DE exception unconditionally. */
2355 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_RaiseDe);
2356}
2357
2358
2359#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2360 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2361
2362/**
2363 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2364 *
2365 * @returns New code buffer offset, UINT32_MAX on failure.
2366 * @param pReNative The native recompile state.
2367 * @param off The code buffer offset.
2368 * @param idxInstr The current instruction.
2369 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2370 * @param cbAlign The alignment in bytes to check against.
2371 */
2372DECL_INLINE_THROW(uint32_t)
2373iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
2374{
2375 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2376 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2377
2378 /*
2379 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2380 */
2381 off = iemNativeRegFlushPendingWrites(pReNative, off);
2382
2383#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2384 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2385#else
2386 RT_NOREF(idxInstr);
2387#endif
2388
2389 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2390
2391 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2392 kIemNativeExitReason_RaiseGp0);
2393
2394 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2395 return off;
2396}
2397
2398
2399/*********************************************************************************************************************************
2400* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2401*********************************************************************************************************************************/
2402
2403/**
2404 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2405 *
2406 * @returns Pointer to the condition stack entry on success, NULL on failure
2407 * (too many nestings)
2408 */
2409DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2410{
2411#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2412 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2413#endif
2414
2415 uint32_t const idxStack = pReNative->cCondDepth;
2416 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2417
2418 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2419 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2420
2421 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2422 pEntry->fInElse = false;
2423 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2424 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2425
2426 return pEntry;
2427}
2428
2429
2430/**
2431 * Start of the if-block, snapshotting the register and variable state.
2432 */
2433DECL_INLINE_THROW(void)
2434iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2435{
2436 Assert(offIfBlock != UINT32_MAX);
2437 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2438 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2439 Assert(!pEntry->fInElse);
2440
2441 /* Define the start of the IF block if request or for disassembly purposes. */
2442 if (idxLabelIf != UINT32_MAX)
2443 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2444#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2445 else
2446 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2447#else
2448 RT_NOREF(offIfBlock);
2449#endif
2450
2451#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2452 Assert(pReNative->Core.offPc == 0);
2453#endif
2454
2455 /* Copy the initial state so we can restore it in the 'else' block. */
2456 pEntry->InitialState = pReNative->Core;
2457}
2458
2459
2460#define IEM_MC_ELSE() } while (0); \
2461 off = iemNativeEmitElse(pReNative, off); \
2462 do {
2463
2464/** Emits code related to IEM_MC_ELSE. */
2465DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2466{
2467 /* Check sanity and get the conditional stack entry. */
2468 Assert(off != UINT32_MAX);
2469 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2470 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2471 Assert(!pEntry->fInElse);
2472
2473#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2474 /* Writeback any dirty shadow registers. */
2475 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2476 * in one of the branches and leave guest registers already dirty before the start of the if
2477 * block alone. */
2478 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2479#endif
2480
2481 /* Jump to the endif */
2482 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2483
2484 /* Define the else label and enter the else part of the condition. */
2485 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2486 pEntry->fInElse = true;
2487
2488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2489 Assert(pReNative->Core.offPc == 0);
2490#endif
2491
2492 /* Snapshot the core state so we can do a merge at the endif and restore
2493 the snapshot we took at the start of the if-block. */
2494 pEntry->IfFinalState = pReNative->Core;
2495 pReNative->Core = pEntry->InitialState;
2496
2497 return off;
2498}
2499
2500
2501#define IEM_MC_ENDIF() } while (0); \
2502 off = iemNativeEmitEndIf(pReNative, off)
2503
2504/** Emits code related to IEM_MC_ENDIF. */
2505DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2506{
2507 /* Check sanity and get the conditional stack entry. */
2508 Assert(off != UINT32_MAX);
2509 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2510 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2511
2512#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2513 Assert(pReNative->Core.offPc == 0);
2514#endif
2515#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2516 /* Writeback any dirty shadow registers (else branch). */
2517 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2518 * in one of the branches and leave guest registers already dirty before the start of the if
2519 * block alone. */
2520 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2521#endif
2522
2523 /*
2524 * Now we have find common group with the core state at the end of the
2525 * if-final. Use the smallest common denominator and just drop anything
2526 * that isn't the same in both states.
2527 */
2528 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2529 * which is why we're doing this at the end of the else-block.
2530 * But we'd need more info about future for that to be worth the effort. */
2531 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2532#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2533 Assert( pOther->bmGstRegShadowDirty == 0
2534 && pReNative->Core.bmGstRegShadowDirty == 0);
2535#endif
2536
2537 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2538 {
2539 /* shadow guest stuff first. */
2540 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2541 if (fGstRegs)
2542 {
2543 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2544 do
2545 {
2546 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2547 fGstRegs &= ~RT_BIT_64(idxGstReg);
2548
2549 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2550 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2551 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2552 {
2553 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2554 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2555
2556#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2557 /* Writeback any dirty shadow registers we are about to unshadow. */
2558 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2559#endif
2560 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2561 }
2562 } while (fGstRegs);
2563 }
2564 else
2565 {
2566 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2567#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2568 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2569#endif
2570 }
2571
2572 /* Check variables next. For now we must require them to be identical
2573 or stuff we can recreate. */
2574 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2575 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2576 if (fVars)
2577 {
2578 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2579 do
2580 {
2581 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2582 fVars &= ~RT_BIT_32(idxVar);
2583
2584 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2585 {
2586 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2587 continue;
2588 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2589 {
2590 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2591 if (idxHstReg != UINT8_MAX)
2592 {
2593 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2594 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2595 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2596 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2597 }
2598 continue;
2599 }
2600 }
2601 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2602 continue;
2603
2604 /* Irreconcilable, so drop it. */
2605 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2606 if (idxHstReg != UINT8_MAX)
2607 {
2608 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2609 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2610 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2611 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2612 }
2613 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2614 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2615 } while (fVars);
2616 }
2617
2618 /* Finally, check that the host register allocations matches. */
2619 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2620 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2621 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2622 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2623 }
2624
2625 /*
2626 * Define the endif label and maybe the else one if we're still in the 'if' part.
2627 */
2628 if (!pEntry->fInElse)
2629 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2630 else
2631 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2632 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2633
2634 /* Pop the conditional stack.*/
2635 pReNative->cCondDepth -= 1;
2636
2637 return off;
2638}
2639
2640
2641#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2642 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2643 do {
2644
2645/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2646DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2647{
2648 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2649 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2650
2651 /* Get the eflags. */
2652 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2653 kIemNativeGstRegUse_ReadOnly);
2654
2655 /* Test and jump. */
2656 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2657
2658 /* Free but don't flush the EFlags register. */
2659 iemNativeRegFreeTmp(pReNative, idxEflReg);
2660
2661 /* Make a copy of the core state now as we start the if-block. */
2662 iemNativeCondStartIfBlock(pReNative, off);
2663
2664 return off;
2665}
2666
2667
2668#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2669 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2670 do {
2671
2672/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2673DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2674{
2675 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2676 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2677
2678 /* Get the eflags. */
2679 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2680 kIemNativeGstRegUse_ReadOnly);
2681
2682 /* Test and jump. */
2683 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2684
2685 /* Free but don't flush the EFlags register. */
2686 iemNativeRegFreeTmp(pReNative, idxEflReg);
2687
2688 /* Make a copy of the core state now as we start the if-block. */
2689 iemNativeCondStartIfBlock(pReNative, off);
2690
2691 return off;
2692}
2693
2694
2695#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2696 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2697 do {
2698
2699/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2700DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2701{
2702 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2703 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2704
2705 /* Get the eflags. */
2706 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2707 kIemNativeGstRegUse_ReadOnly);
2708
2709 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2710 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2711
2712 /* Test and jump. */
2713 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2714
2715 /* Free but don't flush the EFlags register. */
2716 iemNativeRegFreeTmp(pReNative, idxEflReg);
2717
2718 /* Make a copy of the core state now as we start the if-block. */
2719 iemNativeCondStartIfBlock(pReNative, off);
2720
2721 return off;
2722}
2723
2724
2725#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2726 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2727 do {
2728
2729/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2730DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2731{
2732 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2733 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2734
2735 /* Get the eflags. */
2736 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2737 kIemNativeGstRegUse_ReadOnly);
2738
2739 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2740 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2741
2742 /* Test and jump. */
2743 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2744
2745 /* Free but don't flush the EFlags register. */
2746 iemNativeRegFreeTmp(pReNative, idxEflReg);
2747
2748 /* Make a copy of the core state now as we start the if-block. */
2749 iemNativeCondStartIfBlock(pReNative, off);
2750
2751 return off;
2752}
2753
2754
2755#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2756 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2757 do {
2758
2759#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2760 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2761 do {
2762
2763/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2764DECL_INLINE_THROW(uint32_t)
2765iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2766 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2767{
2768 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2769 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2770
2771 /* Get the eflags. */
2772 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2773 kIemNativeGstRegUse_ReadOnly);
2774
2775 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2776 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2777
2778 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2779 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2780 Assert(iBitNo1 != iBitNo2);
2781
2782#ifdef RT_ARCH_AMD64
2783 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2784
2785 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2786 if (iBitNo1 > iBitNo2)
2787 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2788 else
2789 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2790 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2791
2792#elif defined(RT_ARCH_ARM64)
2793 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2794 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2795
2796 /* and tmpreg, eflreg, #1<<iBitNo1 */
2797 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2798
2799 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2800 if (iBitNo1 > iBitNo2)
2801 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2802 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2803 else
2804 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2805 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2806
2807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2808
2809#else
2810# error "Port me"
2811#endif
2812
2813 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2814 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2815 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2816
2817 /* Free but don't flush the EFlags and tmp registers. */
2818 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2819 iemNativeRegFreeTmp(pReNative, idxEflReg);
2820
2821 /* Make a copy of the core state now as we start the if-block. */
2822 iemNativeCondStartIfBlock(pReNative, off);
2823
2824 return off;
2825}
2826
2827
2828#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2829 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2830 do {
2831
2832#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2833 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2834 do {
2835
2836/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2837 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2838DECL_INLINE_THROW(uint32_t)
2839iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2840 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2841{
2842 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2843 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2844
2845 /* We need an if-block label for the non-inverted variant. */
2846 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2847 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2848
2849 /* Get the eflags. */
2850 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2851 kIemNativeGstRegUse_ReadOnly);
2852
2853 /* Translate the flag masks to bit numbers. */
2854 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2855 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2856
2857 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2858 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2859 Assert(iBitNo1 != iBitNo);
2860
2861 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2862 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2863 Assert(iBitNo2 != iBitNo);
2864 Assert(iBitNo2 != iBitNo1);
2865
2866#ifdef RT_ARCH_AMD64
2867 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2868#elif defined(RT_ARCH_ARM64)
2869 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2870#endif
2871
2872 /* Check for the lone bit first. */
2873 if (!fInverted)
2874 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2875 else
2876 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2877
2878 /* Then extract and compare the other two bits. */
2879#ifdef RT_ARCH_AMD64
2880 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2881 if (iBitNo1 > iBitNo2)
2882 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2883 else
2884 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2885 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2886
2887#elif defined(RT_ARCH_ARM64)
2888 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2889
2890 /* and tmpreg, eflreg, #1<<iBitNo1 */
2891 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2892
2893 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2894 if (iBitNo1 > iBitNo2)
2895 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2896 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2897 else
2898 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2899 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2900
2901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2902
2903#else
2904# error "Port me"
2905#endif
2906
2907 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2908 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2909 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2910
2911 /* Free but don't flush the EFlags and tmp registers. */
2912 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2913 iemNativeRegFreeTmp(pReNative, idxEflReg);
2914
2915 /* Make a copy of the core state now as we start the if-block. */
2916 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2917
2918 return off;
2919}
2920
2921
2922#define IEM_MC_IF_CX_IS_NZ() \
2923 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2924 do {
2925
2926/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2927DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2928{
2929 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2930
2931 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2932 kIemNativeGstRegUse_ReadOnly);
2933 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2934 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2935
2936 iemNativeCondStartIfBlock(pReNative, off);
2937 return off;
2938}
2939
2940
2941#define IEM_MC_IF_ECX_IS_NZ() \
2942 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2943 do {
2944
2945#define IEM_MC_IF_RCX_IS_NZ() \
2946 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2947 do {
2948
2949/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2950DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2951{
2952 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2953
2954 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2955 kIemNativeGstRegUse_ReadOnly);
2956 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2957 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2958
2959 iemNativeCondStartIfBlock(pReNative, off);
2960 return off;
2961}
2962
2963
2964#define IEM_MC_IF_CX_IS_NOT_ONE() \
2965 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2966 do {
2967
2968/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2969DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2970{
2971 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2972
2973 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2974 kIemNativeGstRegUse_ReadOnly);
2975#ifdef RT_ARCH_AMD64
2976 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2977#else
2978 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2979 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2980 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2981#endif
2982 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2983
2984 iemNativeCondStartIfBlock(pReNative, off);
2985 return off;
2986}
2987
2988
2989#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2990 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2991 do {
2992
2993#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2994 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2995 do {
2996
2997/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2998DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2999{
3000 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3001
3002 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3003 kIemNativeGstRegUse_ReadOnly);
3004 if (f64Bit)
3005 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3006 else
3007 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3008 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3009
3010 iemNativeCondStartIfBlock(pReNative, off);
3011 return off;
3012}
3013
3014
3015#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3016 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3017 do {
3018
3019#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3020 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3021 do {
3022
3023/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3024 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3025DECL_INLINE_THROW(uint32_t)
3026iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3027{
3028 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3029 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3030
3031 /* We have to load both RCX and EFLAGS before we can start branching,
3032 otherwise we'll end up in the else-block with an inconsistent
3033 register allocator state.
3034 Doing EFLAGS first as it's more likely to be loaded, right? */
3035 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3036 kIemNativeGstRegUse_ReadOnly);
3037 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3038 kIemNativeGstRegUse_ReadOnly);
3039
3040 /** @todo we could reduce this to a single branch instruction by spending a
3041 * temporary register and some setnz stuff. Not sure if loops are
3042 * worth it. */
3043 /* Check CX. */
3044#ifdef RT_ARCH_AMD64
3045 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3046#else
3047 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3048 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3049 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3050#endif
3051
3052 /* Check the EFlags bit. */
3053 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3054 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3055 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3056 !fCheckIfSet /*fJmpIfSet*/);
3057
3058 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3059 iemNativeRegFreeTmp(pReNative, idxEflReg);
3060
3061 iemNativeCondStartIfBlock(pReNative, off);
3062 return off;
3063}
3064
3065
3066#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3067 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3068 do {
3069
3070#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3071 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3072 do {
3073
3074#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3075 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3076 do {
3077
3078#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3079 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3080 do {
3081
3082/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3083 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3084 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3085 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3086DECL_INLINE_THROW(uint32_t)
3087iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3088 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3089{
3090 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3091 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3092
3093 /* We have to load both RCX and EFLAGS before we can start branching,
3094 otherwise we'll end up in the else-block with an inconsistent
3095 register allocator state.
3096 Doing EFLAGS first as it's more likely to be loaded, right? */
3097 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3098 kIemNativeGstRegUse_ReadOnly);
3099 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3100 kIemNativeGstRegUse_ReadOnly);
3101
3102 /** @todo we could reduce this to a single branch instruction by spending a
3103 * temporary register and some setnz stuff. Not sure if loops are
3104 * worth it. */
3105 /* Check RCX/ECX. */
3106 if (f64Bit)
3107 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3108 else
3109 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3110
3111 /* Check the EFlags bit. */
3112 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3113 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3114 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3115 !fCheckIfSet /*fJmpIfSet*/);
3116
3117 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3118 iemNativeRegFreeTmp(pReNative, idxEflReg);
3119
3120 iemNativeCondStartIfBlock(pReNative, off);
3121 return off;
3122}
3123
3124
3125#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3126 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3127 do {
3128
3129/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3130DECL_INLINE_THROW(uint32_t)
3131iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3132{
3133 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3134
3135 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3136 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3137 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3138 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3139
3140 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3141
3142 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3143
3144 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3145
3146 iemNativeCondStartIfBlock(pReNative, off);
3147 return off;
3148}
3149
3150
3151#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3152 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3153 do {
3154
3155/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3156DECL_INLINE_THROW(uint32_t)
3157iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3158{
3159 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3160 Assert(iGReg < 16);
3161
3162 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3163 kIemNativeGstRegUse_ReadOnly);
3164
3165 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3166
3167 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3168
3169 iemNativeCondStartIfBlock(pReNative, off);
3170 return off;
3171}
3172
3173
3174
3175/*********************************************************************************************************************************
3176* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3177*********************************************************************************************************************************/
3178
3179#define IEM_MC_NOREF(a_Name) \
3180 RT_NOREF_PV(a_Name)
3181
3182#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3183 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3184
3185#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3186 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3187
3188#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3189 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3190
3191#define IEM_MC_LOCAL(a_Type, a_Name) \
3192 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3193
3194#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3195 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3196
3197#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3198 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3199
3200
3201/**
3202 * Sets the host register for @a idxVarRc to @a idxReg.
3203 *
3204 * The register must not be allocated. Any guest register shadowing will be
3205 * implictly dropped by this call.
3206 *
3207 * The variable must not have any register associated with it (causes
3208 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3209 * implied.
3210 *
3211 * @returns idxReg
3212 * @param pReNative The recompiler state.
3213 * @param idxVar The variable.
3214 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3215 * @param off For recording in debug info.
3216 *
3217 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3218 */
3219DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3220{
3221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3222 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3223 Assert(!pVar->fRegAcquired);
3224 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3225 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3226 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3227
3228 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3229 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3230
3231 iemNativeVarSetKindToStack(pReNative, idxVar);
3232 pVar->idxReg = idxReg;
3233
3234 return idxReg;
3235}
3236
3237
3238/**
3239 * A convenient helper function.
3240 */
3241DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3242 uint8_t idxReg, uint32_t *poff)
3243{
3244 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3245 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3246 return idxReg;
3247}
3248
3249
3250/**
3251 * This is called by IEM_MC_END() to clean up all variables.
3252 */
3253DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3254{
3255 uint32_t const bmVars = pReNative->Core.bmVars;
3256 if (bmVars != 0)
3257 iemNativeVarFreeAllSlow(pReNative, bmVars);
3258 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3259 Assert(pReNative->Core.bmStack == 0);
3260}
3261
3262
3263#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3264
3265/**
3266 * This is called by IEM_MC_FREE_LOCAL.
3267 */
3268DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3269{
3270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3271 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3272 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3273}
3274
3275
3276#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3277
3278/**
3279 * This is called by IEM_MC_FREE_ARG.
3280 */
3281DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3282{
3283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3284 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3285 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3286}
3287
3288
3289#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3290
3291/**
3292 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3293 */
3294DECL_INLINE_THROW(uint32_t)
3295iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3296{
3297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3298 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3299 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3300 Assert( pVarDst->cbVar == sizeof(uint16_t)
3301 || pVarDst->cbVar == sizeof(uint32_t));
3302
3303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3304 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3305 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3306 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3307 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3308
3309 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3310
3311 /*
3312 * Special case for immediates.
3313 */
3314 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3315 {
3316 switch (pVarDst->cbVar)
3317 {
3318 case sizeof(uint16_t):
3319 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3320 break;
3321 case sizeof(uint32_t):
3322 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3323 break;
3324 default: AssertFailed(); break;
3325 }
3326 }
3327 else
3328 {
3329 /*
3330 * The generic solution for now.
3331 */
3332 /** @todo optimize this by having the python script make sure the source
3333 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3334 * statement. Then we could just transfer the register assignments. */
3335 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3336 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3337 switch (pVarDst->cbVar)
3338 {
3339 case sizeof(uint16_t):
3340 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3341 break;
3342 case sizeof(uint32_t):
3343 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3344 break;
3345 default: AssertFailed(); break;
3346 }
3347 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3348 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3349 }
3350 return off;
3351}
3352
3353
3354
3355/*********************************************************************************************************************************
3356* Emitters for IEM_MC_CALL_CIMPL_XXX *
3357*********************************************************************************************************************************/
3358
3359/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3360DECL_INLINE_THROW(uint32_t)
3361iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3362 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3363
3364{
3365 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3366
3367#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3368 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3369 when a calls clobber any of the relevatn control registers. */
3370# if 1
3371 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3372 {
3373 /* Likely as long as call+ret are done via cimpl. */
3374 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3375 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3376 }
3377 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3378 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3379 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3380 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3381 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3382 else
3383 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3384 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3385 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3386
3387# else
3388 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3389 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3390 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3391 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3392 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3393 || pfnCImpl == (uintptr_t)iemCImpl_callf
3394 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3395 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3396 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3397 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3398 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3399# endif
3400#endif
3401
3402 /*
3403 * Do all the call setup and cleanup.
3404 */
3405 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3406
3407 /*
3408 * Load the two or three hidden arguments.
3409 */
3410#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3411 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3412 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3413 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3414#else
3415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3416 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3417#endif
3418
3419 /*
3420 * Make the call and check the return code.
3421 *
3422 * Shadow PC copies are always flushed here, other stuff depends on flags.
3423 * Segment and general purpose registers are explictily flushed via the
3424 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3425 * macros.
3426 */
3427 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3428#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3429 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3430#endif
3431 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3432 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3433 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3434 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3435
3436 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3437}
3438
3439
3440#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3441 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3442
3443/** Emits code for IEM_MC_CALL_CIMPL_1. */
3444DECL_INLINE_THROW(uint32_t)
3445iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3446 uintptr_t pfnCImpl, uint8_t idxArg0)
3447{
3448 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3449 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3450}
3451
3452
3453#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3454 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3455
3456/** Emits code for IEM_MC_CALL_CIMPL_2. */
3457DECL_INLINE_THROW(uint32_t)
3458iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3459 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3460{
3461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3462 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3463 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3464}
3465
3466
3467#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3468 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3469 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3470
3471/** Emits code for IEM_MC_CALL_CIMPL_3. */
3472DECL_INLINE_THROW(uint32_t)
3473iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3474 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3475{
3476 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3479 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3480}
3481
3482
3483#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3484 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3485 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3486
3487/** Emits code for IEM_MC_CALL_CIMPL_4. */
3488DECL_INLINE_THROW(uint32_t)
3489iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3490 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3491{
3492 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3496 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3497}
3498
3499
3500#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3501 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3502 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3503
3504/** Emits code for IEM_MC_CALL_CIMPL_4. */
3505DECL_INLINE_THROW(uint32_t)
3506iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3507 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3508{
3509 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3510 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3511 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3513 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3514 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3515}
3516
3517
3518/** Recompiler debugging: Flush guest register shadow copies. */
3519#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3520
3521
3522
3523/*********************************************************************************************************************************
3524* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3529 */
3530DECL_INLINE_THROW(uint32_t)
3531iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3532 uintptr_t pfnAImpl, uint8_t cArgs)
3533{
3534 if (idxVarRc != UINT8_MAX)
3535 {
3536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3537 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3538 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3539 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3540 }
3541
3542 /*
3543 * Do all the call setup and cleanup.
3544 *
3545 * It is only required to flush pending guest register writes in call volatile registers as
3546 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3547 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3548 * no matter the fFlushPendingWrites parameter.
3549 */
3550 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3551
3552 /*
3553 * Make the call and update the return code variable if we've got one.
3554 */
3555 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3556 if (idxVarRc != UINT8_MAX)
3557 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3558
3559 return off;
3560}
3561
3562
3563
3564#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3565 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3566
3567#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3568 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3569
3570/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3571DECL_INLINE_THROW(uint32_t)
3572iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3573{
3574 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3575}
3576
3577
3578#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3579 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3580
3581#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3582 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3583
3584/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3585DECL_INLINE_THROW(uint32_t)
3586iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3587{
3588 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3589 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3590}
3591
3592
3593#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3594 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3595
3596#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3597 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3598
3599/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3600DECL_INLINE_THROW(uint32_t)
3601iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3602 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3603{
3604 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3605 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3606 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3607}
3608
3609
3610#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3611 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3612
3613#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3614 IEM_MC_LOCAL(a_rcType, a_rc); \
3615 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3616
3617/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3618DECL_INLINE_THROW(uint32_t)
3619iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3620 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3621{
3622 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3623 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3624 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3625 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3626}
3627
3628
3629#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3630 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3631
3632#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3633 IEM_MC_LOCAL(a_rcType, a_rc); \
3634 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3635
3636/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3637DECL_INLINE_THROW(uint32_t)
3638iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3639 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3640{
3641 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3642 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3643 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3644 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3645 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3646}
3647
3648
3649
3650/*********************************************************************************************************************************
3651* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3652*********************************************************************************************************************************/
3653
3654#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3655 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3656
3657#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3658 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3659
3660#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3661 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3662
3663#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3664 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3665
3666
3667/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3668 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3669DECL_INLINE_THROW(uint32_t)
3670iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3671{
3672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3674 Assert(iGRegEx < 20);
3675
3676 /* Same discussion as in iemNativeEmitFetchGregU16 */
3677 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3678 kIemNativeGstRegUse_ReadOnly);
3679
3680 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3681 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3682
3683 /* The value is zero-extended to the full 64-bit host register width. */
3684 if (iGRegEx < 16)
3685 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3686 else
3687 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3688
3689 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3690 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3691 return off;
3692}
3693
3694
3695#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3696 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3697
3698#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3699 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3700
3701#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3702 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3703
3704/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3705DECL_INLINE_THROW(uint32_t)
3706iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3707{
3708 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3710 Assert(iGRegEx < 20);
3711
3712 /* Same discussion as in iemNativeEmitFetchGregU16 */
3713 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3714 kIemNativeGstRegUse_ReadOnly);
3715
3716 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3717 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3718
3719 if (iGRegEx < 16)
3720 {
3721 switch (cbSignExtended)
3722 {
3723 case sizeof(uint16_t):
3724 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3725 break;
3726 case sizeof(uint32_t):
3727 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3728 break;
3729 case sizeof(uint64_t):
3730 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3731 break;
3732 default: AssertFailed(); break;
3733 }
3734 }
3735 else
3736 {
3737 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3738 switch (cbSignExtended)
3739 {
3740 case sizeof(uint16_t):
3741 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3742 break;
3743 case sizeof(uint32_t):
3744 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3745 break;
3746 case sizeof(uint64_t):
3747 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3748 break;
3749 default: AssertFailed(); break;
3750 }
3751 }
3752
3753 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3754 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3755 return off;
3756}
3757
3758
3759
3760#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3761 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3762
3763#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3764 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3765
3766#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3767 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3768
3769/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3770DECL_INLINE_THROW(uint32_t)
3771iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3772{
3773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3775 Assert(iGReg < 16);
3776
3777 /*
3778 * We can either just load the low 16-bit of the GPR into a host register
3779 * for the variable, or we can do so via a shadow copy host register. The
3780 * latter will avoid having to reload it if it's being stored later, but
3781 * will waste a host register if it isn't touched again. Since we don't
3782 * know what going to happen, we choose the latter for now.
3783 */
3784 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3785 kIemNativeGstRegUse_ReadOnly);
3786
3787 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3788 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3789 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3790 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3791
3792 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3793 return off;
3794}
3795
3796
3797#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3798 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3799
3800#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3801 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3802
3803/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3804DECL_INLINE_THROW(uint32_t)
3805iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3806{
3807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3808 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3809 Assert(iGReg < 16);
3810
3811 /*
3812 * We can either just load the low 16-bit of the GPR into a host register
3813 * for the variable, or we can do so via a shadow copy host register. The
3814 * latter will avoid having to reload it if it's being stored later, but
3815 * will waste a host register if it isn't touched again. Since we don't
3816 * know what going to happen, we choose the latter for now.
3817 */
3818 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3819 kIemNativeGstRegUse_ReadOnly);
3820
3821 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3822 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3823 if (cbSignExtended == sizeof(uint32_t))
3824 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3825 else
3826 {
3827 Assert(cbSignExtended == sizeof(uint64_t));
3828 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3829 }
3830 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3831
3832 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3833 return off;
3834}
3835
3836
3837#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3838 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3839
3840#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3841 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3842
3843/** Emits code for IEM_MC_FETCH_GREG_U32. */
3844DECL_INLINE_THROW(uint32_t)
3845iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3846{
3847 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3848 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3849 Assert(iGReg < 16);
3850
3851 /*
3852 * We can either just load the low 16-bit of the GPR into a host register
3853 * for the variable, or we can do so via a shadow copy host register. The
3854 * latter will avoid having to reload it if it's being stored later, but
3855 * will waste a host register if it isn't touched again. Since we don't
3856 * know what going to happen, we choose the latter for now.
3857 */
3858 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3859 kIemNativeGstRegUse_ReadOnly);
3860
3861 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3862 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3863 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3864 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3865
3866 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3867 return off;
3868}
3869
3870
3871#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3872 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3873
3874/** Emits code for IEM_MC_FETCH_GREG_U32. */
3875DECL_INLINE_THROW(uint32_t)
3876iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3877{
3878 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3879 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3880 Assert(iGReg < 16);
3881
3882 /*
3883 * We can either just load the low 32-bit of the GPR into a host register
3884 * for the variable, or we can do so via a shadow copy host register. The
3885 * latter will avoid having to reload it if it's being stored later, but
3886 * will waste a host register if it isn't touched again. Since we don't
3887 * know what going to happen, we choose the latter for now.
3888 */
3889 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3890 kIemNativeGstRegUse_ReadOnly);
3891
3892 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3893 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3894 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3895 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3896
3897 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3898 return off;
3899}
3900
3901
3902#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3903 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3904
3905#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3906 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3907
3908/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3909 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3910DECL_INLINE_THROW(uint32_t)
3911iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3912{
3913 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3914 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3915 Assert(iGReg < 16);
3916
3917 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3918 kIemNativeGstRegUse_ReadOnly);
3919
3920 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3921 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3922 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3923 /** @todo name the register a shadow one already? */
3924 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3925
3926 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3927 return off;
3928}
3929
3930
3931#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3932#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3933 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3934
3935/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3936DECL_INLINE_THROW(uint32_t)
3937iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3938{
3939 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3940 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3941 Assert(iGRegLo < 16 && iGRegHi < 16);
3942
3943 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3944 kIemNativeGstRegUse_ReadOnly);
3945 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3946 kIemNativeGstRegUse_ReadOnly);
3947
3948 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3949 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3950 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3951 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3952
3953 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3954 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3955 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3956 return off;
3957}
3958#endif
3959
3960
3961/*********************************************************************************************************************************
3962* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3963*********************************************************************************************************************************/
3964
3965#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3966 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3967
3968/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3969DECL_INLINE_THROW(uint32_t)
3970iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3971{
3972 Assert(iGRegEx < 20);
3973 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3974 kIemNativeGstRegUse_ForUpdate);
3975#ifdef RT_ARCH_AMD64
3976 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3977
3978 /* To the lowest byte of the register: mov r8, imm8 */
3979 if (iGRegEx < 16)
3980 {
3981 if (idxGstTmpReg >= 8)
3982 pbCodeBuf[off++] = X86_OP_REX_B;
3983 else if (idxGstTmpReg >= 4)
3984 pbCodeBuf[off++] = X86_OP_REX;
3985 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3986 pbCodeBuf[off++] = u8Value;
3987 }
3988 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3989 else if (idxGstTmpReg < 4)
3990 {
3991 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3992 pbCodeBuf[off++] = u8Value;
3993 }
3994 else
3995 {
3996 /* ror reg64, 8 */
3997 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3998 pbCodeBuf[off++] = 0xc1;
3999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4000 pbCodeBuf[off++] = 8;
4001
4002 /* mov reg8, imm8 */
4003 if (idxGstTmpReg >= 8)
4004 pbCodeBuf[off++] = X86_OP_REX_B;
4005 else if (idxGstTmpReg >= 4)
4006 pbCodeBuf[off++] = X86_OP_REX;
4007 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4008 pbCodeBuf[off++] = u8Value;
4009
4010 /* rol reg64, 8 */
4011 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4012 pbCodeBuf[off++] = 0xc1;
4013 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4014 pbCodeBuf[off++] = 8;
4015 }
4016
4017#elif defined(RT_ARCH_ARM64)
4018 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4019 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4020 if (iGRegEx < 16)
4021 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4022 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4023 else
4024 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4025 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4026 iemNativeRegFreeTmp(pReNative, idxImmReg);
4027
4028#else
4029# error "Port me!"
4030#endif
4031
4032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4033
4034#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4035 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4036#endif
4037
4038 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4039 return off;
4040}
4041
4042
4043#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4044 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4045
4046/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4047DECL_INLINE_THROW(uint32_t)
4048iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4049{
4050 Assert(iGRegEx < 20);
4051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4052
4053 /*
4054 * If it's a constant value (unlikely) we treat this as a
4055 * IEM_MC_STORE_GREG_U8_CONST statement.
4056 */
4057 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4058 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4059 { /* likely */ }
4060 else
4061 {
4062 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4063 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4064 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4065 }
4066
4067 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4068 kIemNativeGstRegUse_ForUpdate);
4069 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4070
4071#ifdef RT_ARCH_AMD64
4072 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4073 if (iGRegEx < 16)
4074 {
4075 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4076 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4077 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4078 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4079 pbCodeBuf[off++] = X86_OP_REX;
4080 pbCodeBuf[off++] = 0x8a;
4081 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4082 }
4083 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4084 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4085 {
4086 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4087 pbCodeBuf[off++] = 0x8a;
4088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4089 }
4090 else
4091 {
4092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4093
4094 /* ror reg64, 8 */
4095 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4096 pbCodeBuf[off++] = 0xc1;
4097 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4098 pbCodeBuf[off++] = 8;
4099
4100 /* mov reg8, reg8(r/m) */
4101 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4102 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4103 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4104 pbCodeBuf[off++] = X86_OP_REX;
4105 pbCodeBuf[off++] = 0x8a;
4106 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4107
4108 /* rol reg64, 8 */
4109 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4110 pbCodeBuf[off++] = 0xc1;
4111 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4112 pbCodeBuf[off++] = 8;
4113 }
4114
4115#elif defined(RT_ARCH_ARM64)
4116 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4117 or
4118 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4119 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4120 if (iGRegEx < 16)
4121 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4122 else
4123 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4124
4125#else
4126# error "Port me!"
4127#endif
4128 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4129
4130 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4131
4132#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4133 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4134#endif
4135 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4136 return off;
4137}
4138
4139
4140
4141#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4142 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4143
4144/** Emits code for IEM_MC_STORE_GREG_U16. */
4145DECL_INLINE_THROW(uint32_t)
4146iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4147{
4148 Assert(iGReg < 16);
4149 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4150 kIemNativeGstRegUse_ForUpdate);
4151#ifdef RT_ARCH_AMD64
4152 /* mov reg16, imm16 */
4153 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4154 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4155 if (idxGstTmpReg >= 8)
4156 pbCodeBuf[off++] = X86_OP_REX_B;
4157 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4158 pbCodeBuf[off++] = RT_BYTE1(uValue);
4159 pbCodeBuf[off++] = RT_BYTE2(uValue);
4160
4161#elif defined(RT_ARCH_ARM64)
4162 /* movk xdst, #uValue, lsl #0 */
4163 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4164 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4165
4166#else
4167# error "Port me!"
4168#endif
4169
4170 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4171
4172#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4173 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4174#endif
4175 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4176 return off;
4177}
4178
4179
4180#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4181 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4182
4183/** Emits code for IEM_MC_STORE_GREG_U16. */
4184DECL_INLINE_THROW(uint32_t)
4185iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4186{
4187 Assert(iGReg < 16);
4188 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4189
4190 /*
4191 * If it's a constant value (unlikely) we treat this as a
4192 * IEM_MC_STORE_GREG_U16_CONST statement.
4193 */
4194 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4195 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4196 { /* likely */ }
4197 else
4198 {
4199 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4200 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4201 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4202 }
4203
4204 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4205 kIemNativeGstRegUse_ForUpdate);
4206
4207#ifdef RT_ARCH_AMD64
4208 /* mov reg16, reg16 or [mem16] */
4209 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4210 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4211 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4212 {
4213 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4214 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4215 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4216 pbCodeBuf[off++] = 0x8b;
4217 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4218 }
4219 else
4220 {
4221 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4222 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4223 if (idxGstTmpReg >= 8)
4224 pbCodeBuf[off++] = X86_OP_REX_R;
4225 pbCodeBuf[off++] = 0x8b;
4226 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4227 }
4228
4229#elif defined(RT_ARCH_ARM64)
4230 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4231 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4232 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4233 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4234 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4235
4236#else
4237# error "Port me!"
4238#endif
4239
4240 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4241
4242#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4243 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4244#endif
4245 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4246 return off;
4247}
4248
4249
4250#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4251 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4252
4253/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4256{
4257 Assert(iGReg < 16);
4258 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4259 kIemNativeGstRegUse_ForFullWrite);
4260 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4261#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4262 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4263#endif
4264 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4265 return off;
4266}
4267
4268
4269#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4270 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4271
4272/** Emits code for IEM_MC_STORE_GREG_U32. */
4273DECL_INLINE_THROW(uint32_t)
4274iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4275{
4276 Assert(iGReg < 16);
4277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4278
4279 /*
4280 * If it's a constant value (unlikely) we treat this as a
4281 * IEM_MC_STORE_GREG_U32_CONST statement.
4282 */
4283 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4284 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4285 { /* likely */ }
4286 else
4287 {
4288 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4289 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4290 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4291 }
4292
4293 /*
4294 * For the rest we allocate a guest register for the variable and writes
4295 * it to the CPUMCTX structure.
4296 */
4297 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4298#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4299 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4300#else
4301 RT_NOREF(idxVarReg);
4302#endif
4303#ifdef VBOX_STRICT
4304 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4305#endif
4306 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4307 return off;
4308}
4309
4310
4311#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4312 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4313
4314/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4315DECL_INLINE_THROW(uint32_t)
4316iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4317{
4318 Assert(iGReg < 16);
4319 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4320 kIemNativeGstRegUse_ForFullWrite);
4321 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4322#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4323 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4324#endif
4325 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4326 return off;
4327}
4328
4329
4330#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4331 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4332
4333#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4334 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4335
4336/** Emits code for IEM_MC_STORE_GREG_U64. */
4337DECL_INLINE_THROW(uint32_t)
4338iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4339{
4340 Assert(iGReg < 16);
4341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4342
4343 /*
4344 * If it's a constant value (unlikely) we treat this as a
4345 * IEM_MC_STORE_GREG_U64_CONST statement.
4346 */
4347 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4348 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4349 { /* likely */ }
4350 else
4351 {
4352 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4354 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4355 }
4356
4357 /*
4358 * For the rest we allocate a guest register for the variable and writes
4359 * it to the CPUMCTX structure.
4360 */
4361 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4362#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4363 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4364#else
4365 RT_NOREF(idxVarReg);
4366#endif
4367 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4368 return off;
4369}
4370
4371
4372#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4373 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4374
4375/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4376DECL_INLINE_THROW(uint32_t)
4377iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4378{
4379 Assert(iGReg < 16);
4380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4381 kIemNativeGstRegUse_ForUpdate);
4382 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4383#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4384 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4385#endif
4386 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4387 return off;
4388}
4389
4390
4391#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4392#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4393 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4394
4395/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4396DECL_INLINE_THROW(uint32_t)
4397iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4398{
4399 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4400 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4401 Assert(iGRegLo < 16 && iGRegHi < 16);
4402
4403 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4404 kIemNativeGstRegUse_ForFullWrite);
4405 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4406 kIemNativeGstRegUse_ForFullWrite);
4407
4408 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4409 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4410 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4411 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4412
4413 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4414 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4415 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4416 return off;
4417}
4418#endif
4419
4420
4421/*********************************************************************************************************************************
4422* General purpose register manipulation (add, sub). *
4423*********************************************************************************************************************************/
4424
4425#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4426 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4427
4428/** Emits code for IEM_MC_ADD_GREG_U16. */
4429DECL_INLINE_THROW(uint32_t)
4430iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4431{
4432 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4433 kIemNativeGstRegUse_ForUpdate);
4434
4435#ifdef RT_ARCH_AMD64
4436 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4437 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4438 if (idxGstTmpReg >= 8)
4439 pbCodeBuf[off++] = X86_OP_REX_B;
4440 if (uAddend == 1)
4441 {
4442 pbCodeBuf[off++] = 0xff; /* inc */
4443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4444 }
4445 else
4446 {
4447 pbCodeBuf[off++] = 0x81;
4448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4449 pbCodeBuf[off++] = uAddend;
4450 pbCodeBuf[off++] = 0;
4451 }
4452
4453#else
4454 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4455 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4456
4457 /* sub tmp, gstgrp, uAddend */
4458 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4459
4460 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4461 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4462
4463 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4464#endif
4465
4466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4467
4468#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4469 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4470#endif
4471
4472 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4473 return off;
4474}
4475
4476
4477#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4478 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4479
4480#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4481 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4482
4483/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4484DECL_INLINE_THROW(uint32_t)
4485iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4486{
4487 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4488 kIemNativeGstRegUse_ForUpdate);
4489
4490#ifdef RT_ARCH_AMD64
4491 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4492 if (f64Bit)
4493 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4494 else if (idxGstTmpReg >= 8)
4495 pbCodeBuf[off++] = X86_OP_REX_B;
4496 if (uAddend == 1)
4497 {
4498 pbCodeBuf[off++] = 0xff; /* inc */
4499 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4500 }
4501 else if (uAddend < 128)
4502 {
4503 pbCodeBuf[off++] = 0x83; /* add */
4504 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4505 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4506 }
4507 else
4508 {
4509 pbCodeBuf[off++] = 0x81; /* add */
4510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4511 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4512 pbCodeBuf[off++] = 0;
4513 pbCodeBuf[off++] = 0;
4514 pbCodeBuf[off++] = 0;
4515 }
4516
4517#else
4518 /* sub tmp, gstgrp, uAddend */
4519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4520 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4521
4522#endif
4523
4524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4525
4526#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4527 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4528#endif
4529
4530 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4531 return off;
4532}
4533
4534
4535
4536#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4537 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4538
4539/** Emits code for IEM_MC_SUB_GREG_U16. */
4540DECL_INLINE_THROW(uint32_t)
4541iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4542{
4543 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4544 kIemNativeGstRegUse_ForUpdate);
4545
4546#ifdef RT_ARCH_AMD64
4547 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4548 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4549 if (idxGstTmpReg >= 8)
4550 pbCodeBuf[off++] = X86_OP_REX_B;
4551 if (uSubtrahend == 1)
4552 {
4553 pbCodeBuf[off++] = 0xff; /* dec */
4554 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4555 }
4556 else
4557 {
4558 pbCodeBuf[off++] = 0x81;
4559 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4560 pbCodeBuf[off++] = uSubtrahend;
4561 pbCodeBuf[off++] = 0;
4562 }
4563
4564#else
4565 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4566 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4567
4568 /* sub tmp, gstgrp, uSubtrahend */
4569 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4570
4571 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4572 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4573
4574 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4575#endif
4576
4577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4578
4579#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4580 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4581#endif
4582
4583 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4584 return off;
4585}
4586
4587
4588#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4589 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4590
4591#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4592 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4593
4594/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4595DECL_INLINE_THROW(uint32_t)
4596iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4597{
4598 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4599 kIemNativeGstRegUse_ForUpdate);
4600
4601#ifdef RT_ARCH_AMD64
4602 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4603 if (f64Bit)
4604 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4605 else if (idxGstTmpReg >= 8)
4606 pbCodeBuf[off++] = X86_OP_REX_B;
4607 if (uSubtrahend == 1)
4608 {
4609 pbCodeBuf[off++] = 0xff; /* dec */
4610 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4611 }
4612 else if (uSubtrahend < 128)
4613 {
4614 pbCodeBuf[off++] = 0x83; /* sub */
4615 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4616 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4617 }
4618 else
4619 {
4620 pbCodeBuf[off++] = 0x81; /* sub */
4621 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4622 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4623 pbCodeBuf[off++] = 0;
4624 pbCodeBuf[off++] = 0;
4625 pbCodeBuf[off++] = 0;
4626 }
4627
4628#else
4629 /* sub tmp, gstgrp, uSubtrahend */
4630 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4631 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4632
4633#endif
4634
4635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4636
4637#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4638 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4639#endif
4640
4641 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4642 return off;
4643}
4644
4645
4646#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4647 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4648
4649#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4650 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4651
4652#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4653 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4654
4655#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4656 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4657
4658/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4659DECL_INLINE_THROW(uint32_t)
4660iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4661{
4662#ifdef VBOX_STRICT
4663 switch (cbMask)
4664 {
4665 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4666 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4667 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4668 case sizeof(uint64_t): break;
4669 default: AssertFailedBreak();
4670 }
4671#endif
4672
4673 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4674 kIemNativeGstRegUse_ForUpdate);
4675
4676 switch (cbMask)
4677 {
4678 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4679 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4680 break;
4681 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4682 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4683 break;
4684 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4685 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4686 break;
4687 case sizeof(uint64_t):
4688 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4689 break;
4690 default: AssertFailedBreak();
4691 }
4692
4693 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4694
4695#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4696 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4697#endif
4698
4699 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4700 return off;
4701}
4702
4703
4704#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4705 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4706
4707#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4708 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4709
4710#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4711 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4712
4713#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4714 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4715
4716/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4717DECL_INLINE_THROW(uint32_t)
4718iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4719{
4720#ifdef VBOX_STRICT
4721 switch (cbMask)
4722 {
4723 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4724 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4725 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4726 case sizeof(uint64_t): break;
4727 default: AssertFailedBreak();
4728 }
4729#endif
4730
4731 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4732 kIemNativeGstRegUse_ForUpdate);
4733
4734 switch (cbMask)
4735 {
4736 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4737 case sizeof(uint16_t):
4738 case sizeof(uint64_t):
4739 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4740 break;
4741 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4742 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4743 break;
4744 default: AssertFailedBreak();
4745 }
4746
4747 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4748
4749#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4750 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4751#endif
4752
4753 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4754 return off;
4755}
4756
4757
4758/*********************************************************************************************************************************
4759* Local/Argument variable manipulation (add, sub, and, or). *
4760*********************************************************************************************************************************/
4761
4762#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4763 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4764
4765#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4766 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4767
4768#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4769 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4770
4771#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4772 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4773
4774
4775#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4776 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4777
4778#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4779 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4780
4781#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4782 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4783
4784/** Emits code for AND'ing a local and a constant value. */
4785DECL_INLINE_THROW(uint32_t)
4786iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4787{
4788#ifdef VBOX_STRICT
4789 switch (cbMask)
4790 {
4791 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4792 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4793 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4794 case sizeof(uint64_t): break;
4795 default: AssertFailedBreak();
4796 }
4797#endif
4798
4799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4800 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4801
4802 if (cbMask <= sizeof(uint32_t))
4803 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4804 else
4805 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4806
4807 iemNativeVarRegisterRelease(pReNative, idxVar);
4808 return off;
4809}
4810
4811
4812#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4813 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4814
4815#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4816 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4817
4818#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4819 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4820
4821#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4822 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4823
4824/** Emits code for OR'ing a local and a constant value. */
4825DECL_INLINE_THROW(uint32_t)
4826iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4827{
4828#ifdef VBOX_STRICT
4829 switch (cbMask)
4830 {
4831 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4832 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4833 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4834 case sizeof(uint64_t): break;
4835 default: AssertFailedBreak();
4836 }
4837#endif
4838
4839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4840 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4841
4842 if (cbMask <= sizeof(uint32_t))
4843 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4844 else
4845 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4846
4847 iemNativeVarRegisterRelease(pReNative, idxVar);
4848 return off;
4849}
4850
4851
4852#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4853 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4854
4855#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4856 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4857
4858#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4859 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4860
4861/** Emits code for reversing the byte order in a local value. */
4862DECL_INLINE_THROW(uint32_t)
4863iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4864{
4865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4866 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4867
4868 switch (cbLocal)
4869 {
4870 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4871 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4872 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4873 default: AssertFailedBreak();
4874 }
4875
4876 iemNativeVarRegisterRelease(pReNative, idxVar);
4877 return off;
4878}
4879
4880
4881#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4882 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4883
4884#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4885 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4886
4887#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4888 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4889
4890/** Emits code for shifting left a local value. */
4891DECL_INLINE_THROW(uint32_t)
4892iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4893{
4894#ifdef VBOX_STRICT
4895 switch (cbLocal)
4896 {
4897 case sizeof(uint8_t): Assert(cShift < 8); break;
4898 case sizeof(uint16_t): Assert(cShift < 16); break;
4899 case sizeof(uint32_t): Assert(cShift < 32); break;
4900 case sizeof(uint64_t): Assert(cShift < 64); break;
4901 default: AssertFailedBreak();
4902 }
4903#endif
4904
4905 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4906 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4907
4908 if (cbLocal <= sizeof(uint32_t))
4909 {
4910 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4911 if (cbLocal < sizeof(uint32_t))
4912 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4913 cbLocal == sizeof(uint16_t)
4914 ? UINT32_C(0xffff)
4915 : UINT32_C(0xff));
4916 }
4917 else
4918 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4919
4920 iemNativeVarRegisterRelease(pReNative, idxVar);
4921 return off;
4922}
4923
4924
4925#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4926 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4927
4928#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4929 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4930
4931#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4932 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4933
4934/** Emits code for shifting left a local value. */
4935DECL_INLINE_THROW(uint32_t)
4936iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4937{
4938#ifdef VBOX_STRICT
4939 switch (cbLocal)
4940 {
4941 case sizeof(int8_t): Assert(cShift < 8); break;
4942 case sizeof(int16_t): Assert(cShift < 16); break;
4943 case sizeof(int32_t): Assert(cShift < 32); break;
4944 case sizeof(int64_t): Assert(cShift < 64); break;
4945 default: AssertFailedBreak();
4946 }
4947#endif
4948
4949 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4951
4952 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4953 if (cbLocal == sizeof(uint8_t))
4954 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4955 else if (cbLocal == sizeof(uint16_t))
4956 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4957
4958 if (cbLocal <= sizeof(uint32_t))
4959 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4960 else
4961 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4962
4963 iemNativeVarRegisterRelease(pReNative, idxVar);
4964 return off;
4965}
4966
4967
4968#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4969 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4970
4971#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4972 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4973
4974#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4975 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4976
4977/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4978DECL_INLINE_THROW(uint32_t)
4979iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4980{
4981 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4982 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4983 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4984 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4985
4986 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4987 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4988
4989 /* Need to sign extend the value. */
4990 if (cbLocal <= sizeof(uint32_t))
4991 {
4992/** @todo ARM64: In case of boredone, the extended add instruction can do the
4993 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
4994 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4995
4996 switch (cbLocal)
4997 {
4998 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
4999 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5000 default: AssertFailed();
5001 }
5002
5003 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5004 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5005 }
5006 else
5007 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5008
5009 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5010 iemNativeVarRegisterRelease(pReNative, idxVar);
5011 return off;
5012}
5013
5014
5015
5016/*********************************************************************************************************************************
5017* EFLAGS *
5018*********************************************************************************************************************************/
5019
5020#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5021# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5022#else
5023# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5024 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5025
5026DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5027{
5028 if (fEflOutput)
5029 {
5030 PVMCPUCC const pVCpu = pReNative->pVCpu;
5031# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5032 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5033 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5034 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5035# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5036 if (fEflOutput & (a_fEfl)) \
5037 { \
5038 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5039 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5040 else \
5041 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5042 } else do { } while (0)
5043# else
5044 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5045 IEMLIVENESSBIT const LivenessClobbered =
5046 {
5047 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5048 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5049 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5050 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5051 };
5052 IEMLIVENESSBIT const LivenessDelayable =
5053 {
5054 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5055 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5056 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5057 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5058 };
5059# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5060 if (fEflOutput & (a_fEfl)) \
5061 { \
5062 if (LivenessClobbered.a_fLivenessMember) \
5063 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5064 else if (LivenessDelayable.a_fLivenessMember) \
5065 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5066 else \
5067 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5068 } else do { } while (0)
5069# endif
5070 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5071 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5072 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5073 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5074 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5075 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5076 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5077# undef CHECK_FLAG_AND_UPDATE_STATS
5078 }
5079 RT_NOREF(fEflInput);
5080}
5081#endif /* VBOX_WITH_STATISTICS */
5082
5083#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5084#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5085 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5086
5087/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5090 uint32_t fEflInput, uint32_t fEflOutput)
5091{
5092 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5093 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5094 RT_NOREF(fEflInput, fEflOutput);
5095
5096#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5097# ifdef VBOX_STRICT
5098 if ( pReNative->idxCurCall != 0
5099 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5100 {
5101 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5102 uint32_t const fBoth = fEflInput | fEflOutput;
5103# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5104 AssertMsg( !(fBoth & (a_fElfConst)) \
5105 || (!(fEflInput & (a_fElfConst)) \
5106 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5107 : !(fEflOutput & (a_fElfConst)) \
5108 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5109 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5110 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5111 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5112 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5113 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5114 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5115 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5116 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5117 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5118# undef ASSERT_ONE_EFL
5119 }
5120# endif
5121#endif
5122
5123 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5124
5125 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5126 * the existing shadow copy. */
5127 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5128 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5129 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5130 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5131 return off;
5132}
5133
5134
5135
5136/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5137 * start using it with custom native code emission (inlining assembly
5138 * instruction helpers). */
5139#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5140#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5141 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5142 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5143
5144#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5145#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5146 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5147 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5148
5149/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5150DECL_INLINE_THROW(uint32_t)
5151iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5152 bool fUpdateSkipping)
5153{
5154 RT_NOREF(fEflOutput);
5155 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5156 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5157
5158#ifdef VBOX_STRICT
5159 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5160 uint32_t offFixup = off;
5161 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5162 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5163 iemNativeFixupFixedJump(pReNative, offFixup, off);
5164
5165 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5166 offFixup = off;
5167 off = iemNativeEmitJzToFixed(pReNative, off, off);
5168 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5169 iemNativeFixupFixedJump(pReNative, offFixup, off);
5170
5171 /** @todo validate that only bits in the fElfOutput mask changed. */
5172#endif
5173
5174#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5175 if (fUpdateSkipping)
5176 {
5177 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5178 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5179 else
5180 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5181 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5182 }
5183#else
5184 RT_NOREF_PV(fUpdateSkipping);
5185#endif
5186
5187 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5188 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5189 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5190 return off;
5191}
5192
5193
5194typedef enum IEMNATIVEMITEFLOP
5195{
5196 kIemNativeEmitEflOp_Invalid = 0,
5197 kIemNativeEmitEflOp_Set,
5198 kIemNativeEmitEflOp_Clear,
5199 kIemNativeEmitEflOp_Flip
5200} IEMNATIVEMITEFLOP;
5201
5202#define IEM_MC_SET_EFL_BIT(a_fBit) \
5203 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5204
5205#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5206 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5207
5208#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5209 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5210
5211/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5212DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5213{
5214 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5215 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5216
5217 switch (enmOp)
5218 {
5219 case kIemNativeEmitEflOp_Set:
5220 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5221 break;
5222 case kIemNativeEmitEflOp_Clear:
5223 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5224 break;
5225 case kIemNativeEmitEflOp_Flip:
5226 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5227 break;
5228 default:
5229 AssertFailed();
5230 break;
5231 }
5232
5233 /** @todo No delayed writeback for EFLAGS right now. */
5234 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5235
5236 /* Free but don't flush the EFLAGS register. */
5237 iemNativeRegFreeTmp(pReNative, idxEflReg);
5238
5239 return off;
5240}
5241
5242
5243/*********************************************************************************************************************************
5244* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5245*********************************************************************************************************************************/
5246
5247#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5248 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5249
5250#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5251 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5252
5253#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5254 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5255
5256
5257/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5258 * IEM_MC_FETCH_SREG_ZX_U64. */
5259DECL_INLINE_THROW(uint32_t)
5260iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5261{
5262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5263 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5264 Assert(iSReg < X86_SREG_COUNT);
5265
5266 /*
5267 * For now, we will not create a shadow copy of a selector. The rational
5268 * is that since we do not recompile the popping and loading of segment
5269 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5270 * pushing and moving to registers, there is only a small chance that the
5271 * shadow copy will be accessed again before the register is reloaded. One
5272 * scenario would be nested called in 16-bit code, but I doubt it's worth
5273 * the extra register pressure atm.
5274 *
5275 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5276 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5277 * store scencario covered at present (r160730).
5278 */
5279 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5280 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5281 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5282 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5283 return off;
5284}
5285
5286
5287
5288/*********************************************************************************************************************************
5289* Register references. *
5290*********************************************************************************************************************************/
5291
5292#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5293 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5294
5295#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5296 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5297
5298/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5299DECL_INLINE_THROW(uint32_t)
5300iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5301{
5302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5303 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5304 Assert(iGRegEx < 20);
5305
5306 if (iGRegEx < 16)
5307 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5308 else
5309 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5310
5311 /* If we've delayed writing back the register value, flush it now. */
5312 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5313
5314 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5315 if (!fConst)
5316 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5317
5318 return off;
5319}
5320
5321#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5322 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5323
5324#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5325 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5326
5327#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5328 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5329
5330#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5331 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5332
5333#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5334 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5335
5336#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5337 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5338
5339#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5347
5348#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5349 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5350
5351/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5352DECL_INLINE_THROW(uint32_t)
5353iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5354{
5355 Assert(iGReg < 16);
5356 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5357 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5358
5359 /* If we've delayed writing back the register value, flush it now. */
5360 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5361
5362 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5363 if (!fConst)
5364 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5365
5366 return off;
5367}
5368
5369
5370#undef IEM_MC_REF_EFLAGS /* should not be used. */
5371#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5372 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5373 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5374
5375/** Handles IEM_MC_REF_EFLAGS. */
5376DECL_INLINE_THROW(uint32_t)
5377iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5378{
5379 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5380 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5381
5382#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5383 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5384
5385 /* Updating the skipping according to the outputs is a little early, but
5386 we don't have any other hooks for references atm. */
5387 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5388 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5389 else if (fEflOutput & X86_EFL_STATUS_BITS)
5390 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5391 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5392#else
5393 RT_NOREF(fEflInput, fEflOutput);
5394#endif
5395
5396 /* If we've delayed writing back the register value, flush it now. */
5397 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5398
5399 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5400 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5401
5402 return off;
5403}
5404
5405
5406/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5407 * different code from threaded recompiler, maybe it would be helpful. For now
5408 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5409#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5410
5411
5412#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5413 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5414
5415#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5416 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5417
5418#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5419 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5420
5421#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5422/* Just being paranoid here. */
5423# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5424AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5425AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5426AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5427AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5428# endif
5429AssertCompileMemberOffset(X86XMMREG, au64, 0);
5430AssertCompileMemberOffset(X86XMMREG, au32, 0);
5431AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5432AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5433
5434# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5435 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5436# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5437 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5438# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5439 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5440# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5441 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5442#endif
5443
5444/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5445DECL_INLINE_THROW(uint32_t)
5446iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5447{
5448 Assert(iXReg < 16);
5449 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5450 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5451
5452 /* If we've delayed writing back the register value, flush it now. */
5453 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5454
5455#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5456 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5457 if (!fConst)
5458 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5459#else
5460 RT_NOREF(fConst);
5461#endif
5462
5463 return off;
5464}
5465
5466
5467
5468/*********************************************************************************************************************************
5469* Effective Address Calculation *
5470*********************************************************************************************************************************/
5471#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5472 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5473
5474/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5475 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5476DECL_INLINE_THROW(uint32_t)
5477iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5478 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5479{
5480 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5481
5482 /*
5483 * Handle the disp16 form with no registers first.
5484 *
5485 * Convert to an immediate value, as that'll delay the register allocation
5486 * and assignment till the memory access / call / whatever and we can use
5487 * a more appropriate register (or none at all).
5488 */
5489 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5490 {
5491 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5492 return off;
5493 }
5494
5495 /* Determin the displacment. */
5496 uint16_t u16EffAddr;
5497 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5498 {
5499 case 0: u16EffAddr = 0; break;
5500 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5501 case 2: u16EffAddr = u16Disp; break;
5502 default: AssertFailedStmt(u16EffAddr = 0);
5503 }
5504
5505 /* Determine the registers involved. */
5506 uint8_t idxGstRegBase;
5507 uint8_t idxGstRegIndex;
5508 switch (bRm & X86_MODRM_RM_MASK)
5509 {
5510 case 0:
5511 idxGstRegBase = X86_GREG_xBX;
5512 idxGstRegIndex = X86_GREG_xSI;
5513 break;
5514 case 1:
5515 idxGstRegBase = X86_GREG_xBX;
5516 idxGstRegIndex = X86_GREG_xDI;
5517 break;
5518 case 2:
5519 idxGstRegBase = X86_GREG_xBP;
5520 idxGstRegIndex = X86_GREG_xSI;
5521 break;
5522 case 3:
5523 idxGstRegBase = X86_GREG_xBP;
5524 idxGstRegIndex = X86_GREG_xDI;
5525 break;
5526 case 4:
5527 idxGstRegBase = X86_GREG_xSI;
5528 idxGstRegIndex = UINT8_MAX;
5529 break;
5530 case 5:
5531 idxGstRegBase = X86_GREG_xDI;
5532 idxGstRegIndex = UINT8_MAX;
5533 break;
5534 case 6:
5535 idxGstRegBase = X86_GREG_xBP;
5536 idxGstRegIndex = UINT8_MAX;
5537 break;
5538#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5539 default:
5540#endif
5541 case 7:
5542 idxGstRegBase = X86_GREG_xBX;
5543 idxGstRegIndex = UINT8_MAX;
5544 break;
5545 }
5546
5547 /*
5548 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5549 */
5550 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5551 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5552 kIemNativeGstRegUse_ReadOnly);
5553 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5554 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5555 kIemNativeGstRegUse_ReadOnly)
5556 : UINT8_MAX;
5557#ifdef RT_ARCH_AMD64
5558 if (idxRegIndex == UINT8_MAX)
5559 {
5560 if (u16EffAddr == 0)
5561 {
5562 /* movxz ret, base */
5563 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5564 }
5565 else
5566 {
5567 /* lea ret32, [base64 + disp32] */
5568 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5569 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5570 if (idxRegRet >= 8 || idxRegBase >= 8)
5571 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5572 pbCodeBuf[off++] = 0x8d;
5573 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5574 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5575 else
5576 {
5577 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5578 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5579 }
5580 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5581 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5582 pbCodeBuf[off++] = 0;
5583 pbCodeBuf[off++] = 0;
5584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5585
5586 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5587 }
5588 }
5589 else
5590 {
5591 /* lea ret32, [index64 + base64 (+ disp32)] */
5592 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5593 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5594 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5595 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5596 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5597 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5598 pbCodeBuf[off++] = 0x8d;
5599 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5600 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5601 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5602 if (bMod == X86_MOD_MEM4)
5603 {
5604 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5605 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5606 pbCodeBuf[off++] = 0;
5607 pbCodeBuf[off++] = 0;
5608 }
5609 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5610 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5611 }
5612
5613#elif defined(RT_ARCH_ARM64)
5614 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5615 if (u16EffAddr == 0)
5616 {
5617 if (idxRegIndex == UINT8_MAX)
5618 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5619 else
5620 {
5621 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5622 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5623 }
5624 }
5625 else
5626 {
5627 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5628 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5629 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5630 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5631 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5632 else
5633 {
5634 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5635 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5636 }
5637 if (idxRegIndex != UINT8_MAX)
5638 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5639 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5640 }
5641
5642#else
5643# error "port me"
5644#endif
5645
5646 if (idxRegIndex != UINT8_MAX)
5647 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5648 iemNativeRegFreeTmp(pReNative, idxRegBase);
5649 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5650 return off;
5651}
5652
5653
5654#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5655 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5656
5657/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5658 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5659DECL_INLINE_THROW(uint32_t)
5660iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5661 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5662{
5663 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5664
5665 /*
5666 * Handle the disp32 form with no registers first.
5667 *
5668 * Convert to an immediate value, as that'll delay the register allocation
5669 * and assignment till the memory access / call / whatever and we can use
5670 * a more appropriate register (or none at all).
5671 */
5672 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5673 {
5674 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5675 return off;
5676 }
5677
5678 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5679 uint32_t u32EffAddr = 0;
5680 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5681 {
5682 case 0: break;
5683 case 1: u32EffAddr = (int8_t)u32Disp; break;
5684 case 2: u32EffAddr = u32Disp; break;
5685 default: AssertFailed();
5686 }
5687
5688 /* Get the register (or SIB) value. */
5689 uint8_t idxGstRegBase = UINT8_MAX;
5690 uint8_t idxGstRegIndex = UINT8_MAX;
5691 uint8_t cShiftIndex = 0;
5692 switch (bRm & X86_MODRM_RM_MASK)
5693 {
5694 case 0: idxGstRegBase = X86_GREG_xAX; break;
5695 case 1: idxGstRegBase = X86_GREG_xCX; break;
5696 case 2: idxGstRegBase = X86_GREG_xDX; break;
5697 case 3: idxGstRegBase = X86_GREG_xBX; break;
5698 case 4: /* SIB */
5699 {
5700 /* index /w scaling . */
5701 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5702 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5703 {
5704 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5705 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5706 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5707 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5708 case 4: cShiftIndex = 0; /*no index*/ break;
5709 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5710 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5711 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5712 }
5713
5714 /* base */
5715 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5716 {
5717 case 0: idxGstRegBase = X86_GREG_xAX; break;
5718 case 1: idxGstRegBase = X86_GREG_xCX; break;
5719 case 2: idxGstRegBase = X86_GREG_xDX; break;
5720 case 3: idxGstRegBase = X86_GREG_xBX; break;
5721 case 4:
5722 idxGstRegBase = X86_GREG_xSP;
5723 u32EffAddr += uSibAndRspOffset >> 8;
5724 break;
5725 case 5:
5726 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5727 idxGstRegBase = X86_GREG_xBP;
5728 else
5729 {
5730 Assert(u32EffAddr == 0);
5731 u32EffAddr = u32Disp;
5732 }
5733 break;
5734 case 6: idxGstRegBase = X86_GREG_xSI; break;
5735 case 7: idxGstRegBase = X86_GREG_xDI; break;
5736 }
5737 break;
5738 }
5739 case 5: idxGstRegBase = X86_GREG_xBP; break;
5740 case 6: idxGstRegBase = X86_GREG_xSI; break;
5741 case 7: idxGstRegBase = X86_GREG_xDI; break;
5742 }
5743
5744 /*
5745 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5746 * the start of the function.
5747 */
5748 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5749 {
5750 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5751 return off;
5752 }
5753
5754 /*
5755 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5756 */
5757 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5758 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5759 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5760 kIemNativeGstRegUse_ReadOnly);
5761 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5762 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5763 kIemNativeGstRegUse_ReadOnly);
5764
5765 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5766 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5767 {
5768 idxRegBase = idxRegIndex;
5769 idxRegIndex = UINT8_MAX;
5770 }
5771
5772#ifdef RT_ARCH_AMD64
5773 if (idxRegIndex == UINT8_MAX)
5774 {
5775 if (u32EffAddr == 0)
5776 {
5777 /* mov ret, base */
5778 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5779 }
5780 else
5781 {
5782 /* lea ret32, [base64 + disp32] */
5783 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5784 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5785 if (idxRegRet >= 8 || idxRegBase >= 8)
5786 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5787 pbCodeBuf[off++] = 0x8d;
5788 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5789 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5790 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5791 else
5792 {
5793 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5794 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5795 }
5796 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5797 if (bMod == X86_MOD_MEM4)
5798 {
5799 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5800 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5801 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5802 }
5803 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5804 }
5805 }
5806 else
5807 {
5808 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5809 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5810 if (idxRegBase == UINT8_MAX)
5811 {
5812 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5813 if (idxRegRet >= 8 || idxRegIndex >= 8)
5814 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5815 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5816 pbCodeBuf[off++] = 0x8d;
5817 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5818 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5819 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5820 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5821 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5822 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5823 }
5824 else
5825 {
5826 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5827 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5828 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5829 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5830 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5831 pbCodeBuf[off++] = 0x8d;
5832 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5833 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5834 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5835 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5836 if (bMod != X86_MOD_MEM0)
5837 {
5838 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5839 if (bMod == X86_MOD_MEM4)
5840 {
5841 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5842 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5843 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5844 }
5845 }
5846 }
5847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5848 }
5849
5850#elif defined(RT_ARCH_ARM64)
5851 if (u32EffAddr == 0)
5852 {
5853 if (idxRegIndex == UINT8_MAX)
5854 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5855 else if (idxRegBase == UINT8_MAX)
5856 {
5857 if (cShiftIndex == 0)
5858 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5859 else
5860 {
5861 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5862 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5863 }
5864 }
5865 else
5866 {
5867 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5868 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5869 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5870 }
5871 }
5872 else
5873 {
5874 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5875 {
5876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5877 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5878 }
5879 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5880 {
5881 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5882 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5883 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5884 }
5885 else
5886 {
5887 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5888 if (idxRegBase != UINT8_MAX)
5889 {
5890 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5891 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5892 }
5893 }
5894 if (idxRegIndex != UINT8_MAX)
5895 {
5896 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5897 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5898 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5899 }
5900 }
5901
5902#else
5903# error "port me"
5904#endif
5905
5906 if (idxRegIndex != UINT8_MAX)
5907 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5908 if (idxRegBase != UINT8_MAX)
5909 iemNativeRegFreeTmp(pReNative, idxRegBase);
5910 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5911 return off;
5912}
5913
5914
5915#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5916 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5917 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5918
5919#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5920 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5921 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5922
5923#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5924 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5925 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5926
5927/**
5928 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5929 *
5930 * @returns New off.
5931 * @param pReNative .
5932 * @param off .
5933 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5934 * bit 4 to REX.X. The two bits are part of the
5935 * REG sub-field, which isn't needed in this
5936 * function.
5937 * @param uSibAndRspOffset Two parts:
5938 * - The first 8 bits make up the SIB byte.
5939 * - The next 8 bits are the fixed RSP/ESP offset
5940 * in case of a pop [xSP].
5941 * @param u32Disp The displacement byte/word/dword, if any.
5942 * @param cbInstr The size of the fully decoded instruction. Used
5943 * for RIP relative addressing.
5944 * @param idxVarRet The result variable number.
5945 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5946 * when calculating the address.
5947 *
5948 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5949 */
5950DECL_INLINE_THROW(uint32_t)
5951iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5952 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5953{
5954 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5955
5956 /*
5957 * Special case the rip + disp32 form first.
5958 */
5959 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5960 {
5961#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5962 /* Need to take the current PC offset into account for the displacement, no need to flush here
5963 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5964 u32Disp += pReNative->Core.offPc;
5965#endif
5966
5967 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5968 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5969 kIemNativeGstRegUse_ReadOnly);
5970#ifdef RT_ARCH_AMD64
5971 if (f64Bit)
5972 {
5973 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5974 if ((int32_t)offFinalDisp == offFinalDisp)
5975 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5976 else
5977 {
5978 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5979 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5980 }
5981 }
5982 else
5983 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5984
5985#elif defined(RT_ARCH_ARM64)
5986 if (f64Bit)
5987 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5988 (int64_t)(int32_t)u32Disp + cbInstr);
5989 else
5990 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5991 (int32_t)u32Disp + cbInstr);
5992
5993#else
5994# error "Port me!"
5995#endif
5996 iemNativeRegFreeTmp(pReNative, idxRegPc);
5997 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5998 return off;
5999 }
6000
6001 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6002 int64_t i64EffAddr = 0;
6003 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6004 {
6005 case 0: break;
6006 case 1: i64EffAddr = (int8_t)u32Disp; break;
6007 case 2: i64EffAddr = (int32_t)u32Disp; break;
6008 default: AssertFailed();
6009 }
6010
6011 /* Get the register (or SIB) value. */
6012 uint8_t idxGstRegBase = UINT8_MAX;
6013 uint8_t idxGstRegIndex = UINT8_MAX;
6014 uint8_t cShiftIndex = 0;
6015 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6016 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6017 else /* SIB: */
6018 {
6019 /* index /w scaling . */
6020 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6021 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6022 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6023 if (idxGstRegIndex == 4)
6024 {
6025 /* no index */
6026 cShiftIndex = 0;
6027 idxGstRegIndex = UINT8_MAX;
6028 }
6029
6030 /* base */
6031 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6032 if (idxGstRegBase == 4)
6033 {
6034 /* pop [rsp] hack */
6035 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6036 }
6037 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6038 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6039 {
6040 /* mod=0 and base=5 -> disp32, no base reg. */
6041 Assert(i64EffAddr == 0);
6042 i64EffAddr = (int32_t)u32Disp;
6043 idxGstRegBase = UINT8_MAX;
6044 }
6045 }
6046
6047 /*
6048 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6049 * the start of the function.
6050 */
6051 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6052 {
6053 if (f64Bit)
6054 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6055 else
6056 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6057 return off;
6058 }
6059
6060 /*
6061 * Now emit code that calculates:
6062 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6063 * or if !f64Bit:
6064 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6065 */
6066 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6067 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6068 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6069 kIemNativeGstRegUse_ReadOnly);
6070 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6071 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6072 kIemNativeGstRegUse_ReadOnly);
6073
6074 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6075 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6076 {
6077 idxRegBase = idxRegIndex;
6078 idxRegIndex = UINT8_MAX;
6079 }
6080
6081#ifdef RT_ARCH_AMD64
6082 uint8_t bFinalAdj;
6083 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6084 bFinalAdj = 0; /* likely */
6085 else
6086 {
6087 /* pop [rsp] with a problematic disp32 value. Split out the
6088 RSP offset and add it separately afterwards (bFinalAdj). */
6089 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6090 Assert(idxGstRegBase == X86_GREG_xSP);
6091 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6092 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6093 Assert(bFinalAdj != 0);
6094 i64EffAddr -= bFinalAdj;
6095 Assert((int32_t)i64EffAddr == i64EffAddr);
6096 }
6097 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6098//pReNative->pInstrBuf[off++] = 0xcc;
6099
6100 if (idxRegIndex == UINT8_MAX)
6101 {
6102 if (u32EffAddr == 0)
6103 {
6104 /* mov ret, base */
6105 if (f64Bit)
6106 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6107 else
6108 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6109 }
6110 else
6111 {
6112 /* lea ret, [base + disp32] */
6113 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6114 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6115 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6116 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6117 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6118 | (f64Bit ? X86_OP_REX_W : 0);
6119 pbCodeBuf[off++] = 0x8d;
6120 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6121 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6122 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6123 else
6124 {
6125 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6126 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6127 }
6128 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6129 if (bMod == X86_MOD_MEM4)
6130 {
6131 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6132 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6133 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6134 }
6135 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6136 }
6137 }
6138 else
6139 {
6140 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6141 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6142 if (idxRegBase == UINT8_MAX)
6143 {
6144 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6145 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6146 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6147 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6148 | (f64Bit ? X86_OP_REX_W : 0);
6149 pbCodeBuf[off++] = 0x8d;
6150 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6151 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6152 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6153 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6154 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6155 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6156 }
6157 else
6158 {
6159 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6160 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6161 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6162 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6163 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6164 | (f64Bit ? X86_OP_REX_W : 0);
6165 pbCodeBuf[off++] = 0x8d;
6166 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6167 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6168 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6169 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6170 if (bMod != X86_MOD_MEM0)
6171 {
6172 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6173 if (bMod == X86_MOD_MEM4)
6174 {
6175 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6176 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6177 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6178 }
6179 }
6180 }
6181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6182 }
6183
6184 if (!bFinalAdj)
6185 { /* likely */ }
6186 else
6187 {
6188 Assert(f64Bit);
6189 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6190 }
6191
6192#elif defined(RT_ARCH_ARM64)
6193 if (i64EffAddr == 0)
6194 {
6195 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6196 if (idxRegIndex == UINT8_MAX)
6197 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6198 else if (idxRegBase != UINT8_MAX)
6199 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6200 f64Bit, false /*fSetFlags*/, cShiftIndex);
6201 else
6202 {
6203 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6204 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6205 }
6206 }
6207 else
6208 {
6209 if (f64Bit)
6210 { /* likely */ }
6211 else
6212 i64EffAddr = (int32_t)i64EffAddr;
6213
6214 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6215 {
6216 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6218 }
6219 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6220 {
6221 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6222 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6223 }
6224 else
6225 {
6226 if (f64Bit)
6227 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6228 else
6229 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6230 if (idxRegBase != UINT8_MAX)
6231 {
6232 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6233 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6234 }
6235 }
6236 if (idxRegIndex != UINT8_MAX)
6237 {
6238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6239 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6240 f64Bit, false /*fSetFlags*/, cShiftIndex);
6241 }
6242 }
6243
6244#else
6245# error "port me"
6246#endif
6247
6248 if (idxRegIndex != UINT8_MAX)
6249 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6250 if (idxRegBase != UINT8_MAX)
6251 iemNativeRegFreeTmp(pReNative, idxRegBase);
6252 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6253 return off;
6254}
6255
6256
6257/*********************************************************************************************************************************
6258* Memory fetches and stores common *
6259*********************************************************************************************************************************/
6260
6261typedef enum IEMNATIVEMITMEMOP
6262{
6263 kIemNativeEmitMemOp_Store = 0,
6264 kIemNativeEmitMemOp_Fetch,
6265 kIemNativeEmitMemOp_Fetch_Zx_U16,
6266 kIemNativeEmitMemOp_Fetch_Zx_U32,
6267 kIemNativeEmitMemOp_Fetch_Zx_U64,
6268 kIemNativeEmitMemOp_Fetch_Sx_U16,
6269 kIemNativeEmitMemOp_Fetch_Sx_U32,
6270 kIemNativeEmitMemOp_Fetch_Sx_U64
6271} IEMNATIVEMITMEMOP;
6272
6273/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6274 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6275 * (with iSegReg = UINT8_MAX). */
6276DECL_INLINE_THROW(uint32_t)
6277iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6278 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6279 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6280{
6281 /*
6282 * Assert sanity.
6283 */
6284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6285 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6286 Assert( enmOp != kIemNativeEmitMemOp_Store
6287 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6288 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6289 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6290 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6291 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6292 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6293 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6294 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6295#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6296 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6297 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6298#else
6299 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6300#endif
6301 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6302 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6303#ifdef VBOX_STRICT
6304 if (iSegReg == UINT8_MAX)
6305 {
6306 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6307 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6308 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6309 switch (cbMem)
6310 {
6311 case 1:
6312 Assert( pfnFunction
6313 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6314 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6315 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6316 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6317 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6318 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6319 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6320 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6321 : UINT64_C(0xc000b000a0009000) ));
6322 Assert(!fAlignMaskAndCtl);
6323 break;
6324 case 2:
6325 Assert( pfnFunction
6326 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6327 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6328 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6329 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6330 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6331 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6332 : UINT64_C(0xc000b000a0009000) ));
6333 Assert(fAlignMaskAndCtl <= 1);
6334 break;
6335 case 4:
6336 Assert( pfnFunction
6337 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6338 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6339 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6340 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6341 : UINT64_C(0xc000b000a0009000) ));
6342 Assert(fAlignMaskAndCtl <= 3);
6343 break;
6344 case 8:
6345 Assert( pfnFunction
6346 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6347 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6348 : UINT64_C(0xc000b000a0009000) ));
6349 Assert(fAlignMaskAndCtl <= 7);
6350 break;
6351#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6352 case sizeof(RTUINT128U):
6353 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6354 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6355 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6356 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6357 || ( enmOp == kIemNativeEmitMemOp_Store
6358 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6359 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6360 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6361 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6362 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6363 : fAlignMaskAndCtl <= 15);
6364 break;
6365 case sizeof(RTUINT256U):
6366 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6367 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6368 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6369 || ( enmOp == kIemNativeEmitMemOp_Store
6370 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6371 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6372 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6373 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6374 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6375 : fAlignMaskAndCtl <= 31);
6376 break;
6377#endif
6378 }
6379 }
6380 else
6381 {
6382 Assert(iSegReg < 6);
6383 switch (cbMem)
6384 {
6385 case 1:
6386 Assert( pfnFunction
6387 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6388 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6389 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6390 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6391 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6392 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6393 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6394 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6395 : UINT64_C(0xc000b000a0009000) ));
6396 Assert(!fAlignMaskAndCtl);
6397 break;
6398 case 2:
6399 Assert( pfnFunction
6400 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6401 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6402 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6403 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6404 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6405 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6406 : UINT64_C(0xc000b000a0009000) ));
6407 Assert(fAlignMaskAndCtl <= 1);
6408 break;
6409 case 4:
6410 Assert( pfnFunction
6411 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6412 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6413 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6414 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6415 : UINT64_C(0xc000b000a0009000) ));
6416 Assert(fAlignMaskAndCtl <= 3);
6417 break;
6418 case 8:
6419 Assert( pfnFunction
6420 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6421 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6422 : UINT64_C(0xc000b000a0009000) ));
6423 Assert(fAlignMaskAndCtl <= 7);
6424 break;
6425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6426 case sizeof(RTUINT128U):
6427 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6428 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6429 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6430 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6431 || ( enmOp == kIemNativeEmitMemOp_Store
6432 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6433 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6434 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6435 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6436 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6437 : fAlignMaskAndCtl <= 15);
6438 break;
6439 case sizeof(RTUINT256U):
6440 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6441 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6442 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6443 || ( enmOp == kIemNativeEmitMemOp_Store
6444 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6445 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6446 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6447 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6448 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6449 : fAlignMaskAndCtl <= 31);
6450 break;
6451#endif
6452 }
6453 }
6454#endif
6455
6456#ifdef VBOX_STRICT
6457 /*
6458 * Check that the fExec flags we've got make sense.
6459 */
6460 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6461#endif
6462
6463 /*
6464 * To keep things simple we have to commit any pending writes first as we
6465 * may end up making calls.
6466 */
6467 /** @todo we could postpone this till we make the call and reload the
6468 * registers after returning from the call. Not sure if that's sensible or
6469 * not, though. */
6470#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6471 off = iemNativeRegFlushPendingWrites(pReNative, off);
6472#else
6473 /* The program counter is treated differently for now. */
6474 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6475#endif
6476
6477#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6478 /*
6479 * Move/spill/flush stuff out of call-volatile registers.
6480 * This is the easy way out. We could contain this to the tlb-miss branch
6481 * by saving and restoring active stuff here.
6482 */
6483 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6484#endif
6485
6486 /*
6487 * Define labels and allocate the result register (trying for the return
6488 * register if we can).
6489 */
6490 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6491#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6492 uint8_t idxRegValueFetch = UINT8_MAX;
6493
6494 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6495 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6496 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6497 else
6498 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6499 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6500 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6501 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6502#else
6503 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6504 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6505 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6506 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6507#endif
6508 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6509
6510#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6511 uint8_t idxRegValueStore = UINT8_MAX;
6512
6513 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6514 idxRegValueStore = !TlbState.fSkip
6515 && enmOp == kIemNativeEmitMemOp_Store
6516 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6517 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6518 : UINT8_MAX;
6519 else
6520 idxRegValueStore = !TlbState.fSkip
6521 && enmOp == kIemNativeEmitMemOp_Store
6522 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6523 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6524 : UINT8_MAX;
6525
6526#else
6527 uint8_t const idxRegValueStore = !TlbState.fSkip
6528 && enmOp == kIemNativeEmitMemOp_Store
6529 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6530 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6531 : UINT8_MAX;
6532#endif
6533 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6534 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6535 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6536 : UINT32_MAX;
6537
6538 /*
6539 * Jump to the TLB lookup code.
6540 */
6541 if (!TlbState.fSkip)
6542 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6543
6544 /*
6545 * TlbMiss:
6546 *
6547 * Call helper to do the fetching.
6548 * We flush all guest register shadow copies here.
6549 */
6550 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6551
6552#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6553 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6554#else
6555 RT_NOREF(idxInstr);
6556#endif
6557
6558#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6559 if (pReNative->Core.offPc)
6560 {
6561 /*
6562 * Update the program counter but restore it at the end of the TlbMiss branch.
6563 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6564 * which are hopefully much more frequent, reducing the amount of memory accesses.
6565 */
6566 /* Allocate a temporary PC register. */
6567 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6568
6569 /* Perform the addition and store the result. */
6570 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6571 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6572
6573 /* Free and flush the PC register. */
6574 iemNativeRegFreeTmp(pReNative, idxPcReg);
6575 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6576 }
6577#endif
6578
6579#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6580 /* Save variables in volatile registers. */
6581 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6582 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6583 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6584 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6585#endif
6586
6587 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6588 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6589#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6590 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6591 {
6592 /*
6593 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6594 *
6595 * @note There was a register variable assigned to the variable for the TlbLookup case above
6596 * which must not be freed or the value loaded into the register will not be synced into the register
6597 * further down the road because the variable doesn't know it had a variable assigned.
6598 *
6599 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6600 * as it will be overwritten anyway.
6601 */
6602 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6603 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6604 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6605 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6606 }
6607 else
6608#endif
6609 if (enmOp == kIemNativeEmitMemOp_Store)
6610 {
6611 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6612 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6613#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6614 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6615#else
6616 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6617 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6618#endif
6619 }
6620
6621 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6622 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6623#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6624 fVolGregMask);
6625#else
6626 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6627#endif
6628
6629 if (iSegReg != UINT8_MAX)
6630 {
6631 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6632 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6633 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6634 }
6635
6636 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6637 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6638
6639 /* Done setting up parameters, make the call. */
6640 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6641
6642 /*
6643 * Put the result in the right register if this is a fetch.
6644 */
6645 if (enmOp != kIemNativeEmitMemOp_Store)
6646 {
6647#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6648 if ( cbMem == sizeof(RTUINT128U)
6649 || cbMem == sizeof(RTUINT256U))
6650 {
6651 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6652
6653 /* Sync the value on the stack with the host register assigned to the variable. */
6654 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6655 }
6656 else
6657#endif
6658 {
6659 Assert(idxRegValueFetch == pVarValue->idxReg);
6660 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6661 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6662 }
6663 }
6664
6665#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6666 /* Restore variables and guest shadow registers to volatile registers. */
6667 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6668 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6669#endif
6670
6671#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6672 if (pReNative->Core.offPc)
6673 {
6674 /*
6675 * Time to restore the program counter to its original value.
6676 */
6677 /* Allocate a temporary PC register. */
6678 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6679
6680 /* Restore the original value. */
6681 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6682 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6683
6684 /* Free and flush the PC register. */
6685 iemNativeRegFreeTmp(pReNative, idxPcReg);
6686 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6687 }
6688#endif
6689
6690#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6691 if (!TlbState.fSkip)
6692 {
6693 /* end of TlbMiss - Jump to the done label. */
6694 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6695 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6696
6697 /*
6698 * TlbLookup:
6699 */
6700 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6701 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6702 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6703
6704 /*
6705 * Emit code to do the actual storing / fetching.
6706 */
6707 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6708# ifdef IEM_WITH_TLB_STATISTICS
6709 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6710 enmOp == kIemNativeEmitMemOp_Store
6711 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6712 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6713# endif
6714 switch (enmOp)
6715 {
6716 case kIemNativeEmitMemOp_Store:
6717 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6718 {
6719 switch (cbMem)
6720 {
6721 case 1:
6722 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6723 break;
6724 case 2:
6725 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6726 break;
6727 case 4:
6728 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6729 break;
6730 case 8:
6731 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6732 break;
6733#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6734 case sizeof(RTUINT128U):
6735 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6736 break;
6737 case sizeof(RTUINT256U):
6738 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6739 break;
6740#endif
6741 default:
6742 AssertFailed();
6743 }
6744 }
6745 else
6746 {
6747 switch (cbMem)
6748 {
6749 case 1:
6750 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6751 idxRegMemResult, TlbState.idxReg1);
6752 break;
6753 case 2:
6754 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6755 idxRegMemResult, TlbState.idxReg1);
6756 break;
6757 case 4:
6758 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6759 idxRegMemResult, TlbState.idxReg1);
6760 break;
6761 case 8:
6762 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6763 idxRegMemResult, TlbState.idxReg1);
6764 break;
6765 default:
6766 AssertFailed();
6767 }
6768 }
6769 break;
6770
6771 case kIemNativeEmitMemOp_Fetch:
6772 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6773 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6774 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6775 switch (cbMem)
6776 {
6777 case 1:
6778 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6779 break;
6780 case 2:
6781 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6782 break;
6783 case 4:
6784 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6785 break;
6786 case 8:
6787 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6788 break;
6789#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6790 case sizeof(RTUINT128U):
6791 /*
6792 * No need to sync back the register with the stack, this is done by the generic variable handling
6793 * code if there is a register assigned to a variable and the stack must be accessed.
6794 */
6795 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6796 break;
6797 case sizeof(RTUINT256U):
6798 /*
6799 * No need to sync back the register with the stack, this is done by the generic variable handling
6800 * code if there is a register assigned to a variable and the stack must be accessed.
6801 */
6802 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6803 break;
6804#endif
6805 default:
6806 AssertFailed();
6807 }
6808 break;
6809
6810 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6811 Assert(cbMem == 1);
6812 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6813 break;
6814
6815 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6816 Assert(cbMem == 1 || cbMem == 2);
6817 if (cbMem == 1)
6818 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6819 else
6820 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6821 break;
6822
6823 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6824 switch (cbMem)
6825 {
6826 case 1:
6827 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6828 break;
6829 case 2:
6830 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6831 break;
6832 case 4:
6833 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6834 break;
6835 default:
6836 AssertFailed();
6837 }
6838 break;
6839
6840 default:
6841 AssertFailed();
6842 }
6843
6844 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6845
6846 /*
6847 * TlbDone:
6848 */
6849 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6850
6851 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6852
6853# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6854 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6855 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6856# endif
6857 }
6858#else
6859 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6860#endif
6861
6862 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6863 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6864 return off;
6865}
6866
6867
6868
6869/*********************************************************************************************************************************
6870* Memory fetches (IEM_MEM_FETCH_XXX). *
6871*********************************************************************************************************************************/
6872
6873/* 8-bit segmented: */
6874#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6876 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6877 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6878
6879#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6881 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6882 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6883
6884#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6886 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6887 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6888
6889#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6890 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6891 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6892 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6893
6894#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6895 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6896 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6897 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6898
6899#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6900 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6901 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6902 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6903
6904#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6905 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6906 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6907 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6908
6909/* 16-bit segmented: */
6910#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6911 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6912 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6913 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6914
6915#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6917 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6918 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6919
6920#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6922 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6923 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6924
6925#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6927 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6928 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6929
6930#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6932 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6933 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6934
6935#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6937 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6938 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6939
6940
6941/* 32-bit segmented: */
6942#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6944 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6945 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6946
6947#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6949 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6950 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6951
6952#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6954 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6955 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6956
6957#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6959 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6960 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6961
6962#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6964 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6965 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6966
6967#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6968 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6969 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6970 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6971
6972#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6974 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6975 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6976
6977AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6978#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6980 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6981 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6982
6983
6984/* 64-bit segmented: */
6985#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6987 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6988 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6989
6990AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6991#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6993 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6994 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6995
6996
6997/* 8-bit flat: */
6998#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
6999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7000 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7001 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7002
7003#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7004 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7005 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7006 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7007
7008#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7010 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7011 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7012
7013#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7015 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7016 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7017
7018#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7020 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7021 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7022
7023#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7025 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7026 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7027
7028#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7030 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7031 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7032
7033
7034/* 16-bit flat: */
7035#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7037 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7038 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7039
7040#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7042 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7043 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7044
7045#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7046 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7047 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7048 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7049
7050#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7051 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7052 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7053 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7054
7055#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7056 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7057 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7058 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7059
7060#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7061 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7062 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7063 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7064
7065/* 32-bit flat: */
7066#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7067 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7068 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7069 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7070
7071#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7072 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7073 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7074 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7075
7076#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7077 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7078 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7079 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7080
7081#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7083 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7084 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7085
7086#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7088 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7089 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7090
7091#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7092 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7093 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7094 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7095
7096#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7098 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7099 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7100
7101#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7102 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7103 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7104 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7105
7106
7107/* 64-bit flat: */
7108#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7110 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7111 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7112
7113#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7114 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7115 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7116 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7117
7118#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7119/* 128-bit segmented: */
7120#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7122 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7123 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7124
7125#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7126 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7127 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7128 kIemNativeEmitMemOp_Fetch, \
7129 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7130
7131AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7132#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7133 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7134 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7135 kIemNativeEmitMemOp_Fetch, \
7136 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7137
7138#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7139 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7140 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7141 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7142
7143/* 128-bit flat: */
7144#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7145 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7146 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7147 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7148
7149#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7150 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7151 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7152 kIemNativeEmitMemOp_Fetch, \
7153 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7154
7155#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7156 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7157 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7158 kIemNativeEmitMemOp_Fetch, \
7159 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7160
7161#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7162 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7163 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7164 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7165
7166/* 256-bit segmented: */
7167#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7168 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7169 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7170 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7171
7172#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7173 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7174 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7175 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7176
7177#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7178 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7179 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7180 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7181
7182
7183/* 256-bit flat: */
7184#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7185 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7186 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7187 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7188
7189#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7190 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7191 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7192 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7193
7194#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7195 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7196 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7197 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7198#endif
7199
7200
7201/*********************************************************************************************************************************
7202* Memory stores (IEM_MEM_STORE_XXX). *
7203*********************************************************************************************************************************/
7204
7205#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7206 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7207 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7208 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7209
7210#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7212 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7213 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7214
7215#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7217 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7218 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7219
7220#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7221 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7222 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7223 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7224
7225
7226#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7228 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7229 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7230
7231#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7233 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7234 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7235
7236#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7238 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7239 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7240
7241#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7242 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7243 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7244 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7245
7246
7247#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7248 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7249 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7250
7251#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7252 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7253 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7254
7255#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7256 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7257 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7258
7259#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7260 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7261 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7262
7263
7264#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7265 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7266 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7267
7268#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7269 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7270 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7271
7272#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7273 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7274 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7275
7276#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7277 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7278 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7279
7280/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7281 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7282DECL_INLINE_THROW(uint32_t)
7283iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7284 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7285{
7286 /*
7287 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7288 * to do the grunt work.
7289 */
7290 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7291 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7292 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7293 pfnFunction, idxInstr);
7294 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7295 return off;
7296}
7297
7298
7299#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7300# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7301 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7302 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7303 kIemNativeEmitMemOp_Store, \
7304 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7305
7306# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7307 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7308 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7309 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7310
7311# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7312 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7313 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7314 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7315
7316# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7317 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7318 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7319 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7320
7321
7322# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7323 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7324 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7325 kIemNativeEmitMemOp_Store, \
7326 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7327
7328# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7329 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7330 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7331 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7332
7333# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7334 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7335 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7336 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7337
7338# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7339 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7340 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7341 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7342#endif
7343
7344
7345
7346/*********************************************************************************************************************************
7347* Stack Accesses. *
7348*********************************************************************************************************************************/
7349/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7350#define IEM_MC_PUSH_U16(a_u16Value) \
7351 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7352 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7353#define IEM_MC_PUSH_U32(a_u32Value) \
7354 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7355 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7356#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7357 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7358 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7359#define IEM_MC_PUSH_U64(a_u64Value) \
7360 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7361 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7362
7363#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7364 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7365 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7366#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7367 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7368 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7369#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7370 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7371 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7372
7373#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7374 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7375 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7376#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7377 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7378 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7379
7380
7381/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7382DECL_INLINE_THROW(uint32_t)
7383iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7384 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7385{
7386 /*
7387 * Assert sanity.
7388 */
7389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7390 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7391#ifdef VBOX_STRICT
7392 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7393 {
7394 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7395 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7396 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7397 Assert( pfnFunction
7398 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7399 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7400 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7401 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7402 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7403 : UINT64_C(0xc000b000a0009000) ));
7404 }
7405 else
7406 Assert( pfnFunction
7407 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7408 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7409 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7410 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7411 : UINT64_C(0xc000b000a0009000) ));
7412#endif
7413
7414#ifdef VBOX_STRICT
7415 /*
7416 * Check that the fExec flags we've got make sense.
7417 */
7418 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7419#endif
7420
7421 /*
7422 * To keep things simple we have to commit any pending writes first as we
7423 * may end up making calls.
7424 */
7425 /** @todo we could postpone this till we make the call and reload the
7426 * registers after returning from the call. Not sure if that's sensible or
7427 * not, though. */
7428 off = iemNativeRegFlushPendingWrites(pReNative, off);
7429
7430 /*
7431 * First we calculate the new RSP and the effective stack pointer value.
7432 * For 64-bit mode and flat 32-bit these two are the same.
7433 * (Code structure is very similar to that of PUSH)
7434 */
7435 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7436 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7437 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7438 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7439 ? cbMem : sizeof(uint16_t);
7440 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7441 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7442 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7443 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7444 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7445 if (cBitsFlat != 0)
7446 {
7447 Assert(idxRegEffSp == idxRegRsp);
7448 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7449 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7450 if (cBitsFlat == 64)
7451 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7452 else
7453 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7454 }
7455 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7456 {
7457 Assert(idxRegEffSp != idxRegRsp);
7458 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7459 kIemNativeGstRegUse_ReadOnly);
7460#ifdef RT_ARCH_AMD64
7461 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7462#else
7463 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7464#endif
7465 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7466 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7467 offFixupJumpToUseOtherBitSp = off;
7468 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7469 {
7470 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7471 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7472 }
7473 else
7474 {
7475 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7476 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7477 }
7478 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7479 }
7480 /* SpUpdateEnd: */
7481 uint32_t const offLabelSpUpdateEnd = off;
7482
7483 /*
7484 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7485 * we're skipping lookup).
7486 */
7487 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7488 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7489 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7490 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7491 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7492 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7493 : UINT32_MAX;
7494 uint8_t const idxRegValue = !TlbState.fSkip
7495 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7496 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7497 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7498 : UINT8_MAX;
7499 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7500
7501
7502 if (!TlbState.fSkip)
7503 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7504 else
7505 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7506
7507 /*
7508 * Use16BitSp:
7509 */
7510 if (cBitsFlat == 0)
7511 {
7512#ifdef RT_ARCH_AMD64
7513 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7514#else
7515 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7516#endif
7517 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7518 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7519 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7520 else
7521 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7522 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7523 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7524 }
7525
7526 /*
7527 * TlbMiss:
7528 *
7529 * Call helper to do the pushing.
7530 */
7531 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7532
7533#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7534 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7535#else
7536 RT_NOREF(idxInstr);
7537#endif
7538
7539 /* Save variables in volatile registers. */
7540 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7541 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7542 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7543 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7544 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7545
7546 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7547 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7548 {
7549 /* Swap them using ARG0 as temp register: */
7550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7551 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7552 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7553 }
7554 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7555 {
7556 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7557 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7558 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7559
7560 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7561 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7562 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7563 }
7564 else
7565 {
7566 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7568
7569 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7570 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7571 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7572 }
7573
7574 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7576
7577 /* Done setting up parameters, make the call. */
7578 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7579
7580 /* Restore variables and guest shadow registers to volatile registers. */
7581 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7582 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7583
7584#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7585 if (!TlbState.fSkip)
7586 {
7587 /* end of TlbMiss - Jump to the done label. */
7588 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7589 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7590
7591 /*
7592 * TlbLookup:
7593 */
7594 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7595 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7596
7597 /*
7598 * Emit code to do the actual storing / fetching.
7599 */
7600 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7601# ifdef IEM_WITH_TLB_STATISTICS
7602 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7603 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7604# endif
7605 if (idxRegValue != UINT8_MAX)
7606 {
7607 switch (cbMemAccess)
7608 {
7609 case 2:
7610 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7611 break;
7612 case 4:
7613 if (!fIsIntelSeg)
7614 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7615 else
7616 {
7617 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7618 PUSH FS in real mode, so we have to try emulate that here.
7619 We borrow the now unused idxReg1 from the TLB lookup code here. */
7620 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7621 kIemNativeGstReg_EFlags);
7622 if (idxRegEfl != UINT8_MAX)
7623 {
7624#ifdef ARCH_AMD64
7625 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7626 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7627 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7628#else
7629 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7630 off, TlbState.idxReg1, idxRegEfl,
7631 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7632#endif
7633 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7634 }
7635 else
7636 {
7637 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7638 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7639 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7640 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7641 }
7642 /* ASSUMES the upper half of idxRegValue is ZERO. */
7643 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7644 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7645 }
7646 break;
7647 case 8:
7648 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7649 break;
7650 default:
7651 AssertFailed();
7652 }
7653 }
7654 else
7655 {
7656 switch (cbMemAccess)
7657 {
7658 case 2:
7659 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7660 idxRegMemResult, TlbState.idxReg1);
7661 break;
7662 case 4:
7663 Assert(!fIsSegReg);
7664 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7665 idxRegMemResult, TlbState.idxReg1);
7666 break;
7667 case 8:
7668 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7669 break;
7670 default:
7671 AssertFailed();
7672 }
7673 }
7674
7675 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7676 TlbState.freeRegsAndReleaseVars(pReNative);
7677
7678 /*
7679 * TlbDone:
7680 *
7681 * Commit the new RSP value.
7682 */
7683 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7684 }
7685#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7686
7687#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7688 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7689#endif
7690 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7691 if (idxRegEffSp != idxRegRsp)
7692 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7693
7694 /* The value variable is implictly flushed. */
7695 if (idxRegValue != UINT8_MAX)
7696 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7697 iemNativeVarFreeLocal(pReNative, idxVarValue);
7698
7699 return off;
7700}
7701
7702
7703
7704/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7705#define IEM_MC_POP_GREG_U16(a_iGReg) \
7706 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7707 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7708#define IEM_MC_POP_GREG_U32(a_iGReg) \
7709 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7710 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7711#define IEM_MC_POP_GREG_U64(a_iGReg) \
7712 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7713 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7714
7715#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7716 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7717 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7718#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7719 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7720 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7721
7722#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7723 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7724 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7725#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7726 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7727 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7728
7729
7730DECL_FORCE_INLINE_THROW(uint32_t)
7731iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7732 uint8_t idxRegTmp)
7733{
7734 /* Use16BitSp: */
7735#ifdef RT_ARCH_AMD64
7736 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7737 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7738 RT_NOREF(idxRegTmp);
7739#else
7740 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7741 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7742 /* add tmp, regrsp, #cbMem */
7743 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7744 /* and tmp, tmp, #0xffff */
7745 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7746 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7747 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7748 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7749#endif
7750 return off;
7751}
7752
7753
7754DECL_FORCE_INLINE(uint32_t)
7755iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7756{
7757 /* Use32BitSp: */
7758 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7759 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7760 return off;
7761}
7762
7763
7764/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7765DECL_INLINE_THROW(uint32_t)
7766iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7767 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7768{
7769 /*
7770 * Assert sanity.
7771 */
7772 Assert(idxGReg < 16);
7773#ifdef VBOX_STRICT
7774 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7775 {
7776 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7777 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7778 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7779 Assert( pfnFunction
7780 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7781 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7782 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7783 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7784 : UINT64_C(0xc000b000a0009000) ));
7785 }
7786 else
7787 Assert( pfnFunction
7788 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7789 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7790 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7791 : UINT64_C(0xc000b000a0009000) ));
7792#endif
7793
7794#ifdef VBOX_STRICT
7795 /*
7796 * Check that the fExec flags we've got make sense.
7797 */
7798 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7799#endif
7800
7801 /*
7802 * To keep things simple we have to commit any pending writes first as we
7803 * may end up making calls.
7804 */
7805 off = iemNativeRegFlushPendingWrites(pReNative, off);
7806
7807 /*
7808 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7809 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7810 * directly as the effective stack pointer.
7811 * (Code structure is very similar to that of PUSH)
7812 */
7813 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7814 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7815 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7816 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7817 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7818 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7819 * will be the resulting register value. */
7820 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7821
7822 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7823 if (cBitsFlat != 0)
7824 {
7825 Assert(idxRegEffSp == idxRegRsp);
7826 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7827 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7828 }
7829 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7830 {
7831 Assert(idxRegEffSp != idxRegRsp);
7832 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7833 kIemNativeGstRegUse_ReadOnly);
7834#ifdef RT_ARCH_AMD64
7835 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7836#else
7837 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7838#endif
7839 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7840 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7841 offFixupJumpToUseOtherBitSp = off;
7842 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7843 {
7844/** @todo can skip idxRegRsp updating when popping ESP. */
7845 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7846 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7847 }
7848 else
7849 {
7850 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7851 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7852 }
7853 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7854 }
7855 /* SpUpdateEnd: */
7856 uint32_t const offLabelSpUpdateEnd = off;
7857
7858 /*
7859 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7860 * we're skipping lookup).
7861 */
7862 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7863 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7864 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7865 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7866 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7867 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7868 : UINT32_MAX;
7869
7870 if (!TlbState.fSkip)
7871 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7872 else
7873 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7874
7875 /*
7876 * Use16BitSp:
7877 */
7878 if (cBitsFlat == 0)
7879 {
7880#ifdef RT_ARCH_AMD64
7881 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7882#else
7883 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7884#endif
7885 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7886 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7887 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7888 else
7889 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7890 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7892 }
7893
7894 /*
7895 * TlbMiss:
7896 *
7897 * Call helper to do the pushing.
7898 */
7899 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7900
7901#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7902 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7903#else
7904 RT_NOREF(idxInstr);
7905#endif
7906
7907 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7908 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7909 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7910 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7911
7912
7913 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7914 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7915 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7916
7917 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7918 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7919
7920 /* Done setting up parameters, make the call. */
7921 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7922
7923 /* Move the return register content to idxRegMemResult. */
7924 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7925 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7926
7927 /* Restore variables and guest shadow registers to volatile registers. */
7928 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7929 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7930
7931#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7932 if (!TlbState.fSkip)
7933 {
7934 /* end of TlbMiss - Jump to the done label. */
7935 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7936 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7937
7938 /*
7939 * TlbLookup:
7940 */
7941 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7942 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7943
7944 /*
7945 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7946 */
7947 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7948# ifdef IEM_WITH_TLB_STATISTICS
7949 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7950 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7951# endif
7952 switch (cbMem)
7953 {
7954 case 2:
7955 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7956 break;
7957 case 4:
7958 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7959 break;
7960 case 8:
7961 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7962 break;
7963 default:
7964 AssertFailed();
7965 }
7966
7967 TlbState.freeRegsAndReleaseVars(pReNative);
7968
7969 /*
7970 * TlbDone:
7971 *
7972 * Set the new RSP value (FLAT accesses needs to calculate it first) and
7973 * commit the popped register value.
7974 */
7975 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7976 }
7977#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7978
7979 if (idxGReg != X86_GREG_xSP)
7980 {
7981 /* Set the register. */
7982 if (cbMem >= sizeof(uint32_t))
7983 {
7984#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7985 AssertMsg( pReNative->idxCurCall == 0
7986 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
7987 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
7988#endif
7989 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
7990#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7991 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
7992#endif
7993#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7994 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
7995 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7996#endif
7997 }
7998 else
7999 {
8000 Assert(cbMem == sizeof(uint16_t));
8001 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8002 kIemNativeGstRegUse_ForUpdate);
8003 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8004#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8005 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8006#endif
8007 iemNativeRegFreeTmp(pReNative, idxRegDst);
8008 }
8009
8010 /* Complete RSP calculation for FLAT mode. */
8011 if (idxRegEffSp == idxRegRsp)
8012 {
8013 if (cBitsFlat == 64)
8014 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8015 else
8016 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8017 }
8018 }
8019 else
8020 {
8021 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8022 if (cbMem == sizeof(uint64_t))
8023 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8024 else if (cbMem == sizeof(uint32_t))
8025 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8026 else
8027 {
8028 if (idxRegEffSp == idxRegRsp)
8029 {
8030 if (cBitsFlat == 64)
8031 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8032 else
8033 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8034 }
8035 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8036 }
8037 }
8038
8039#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8040 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8041#endif
8042
8043 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8044 if (idxRegEffSp != idxRegRsp)
8045 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8046 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8047
8048 return off;
8049}
8050
8051
8052
8053/*********************************************************************************************************************************
8054* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8055*********************************************************************************************************************************/
8056
8057#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8058 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8059 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8060 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8061
8062#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8063 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8064 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8065 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8066
8067#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8068 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8069 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8070 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8071
8072#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8073 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8074 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8075 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8076
8077
8078#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8079 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8080 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8081 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8082
8083#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8084 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8085 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8086 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8087
8088#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8089 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8090 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8091 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8092
8093#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8094 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8095 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8096 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8097
8098#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8099 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8100 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8101 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8102
8103
8104#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8105 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8106 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8107 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8108
8109#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8110 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8111 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8112 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8113
8114#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8116 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8117 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8118
8119#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8120 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8121 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8122 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8123
8124#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8125 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8126 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8127 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8128
8129
8130#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8132 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8133 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8134
8135#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8137 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8138 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8139#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8140 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8141 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8142 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8143
8144#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8145 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8146 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8147 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8148
8149#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8150 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8151 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8152 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8153
8154
8155#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8156 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8157 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8158 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8159
8160#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8161 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8162 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8163 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8164
8165
8166#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8167 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8168 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8169 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8170
8171#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8172 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8173 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8174 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8175
8176#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8177 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8178 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8179 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8180
8181#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8182 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8183 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8184 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8185
8186
8187
8188#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8190 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8191 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8192
8193#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8194 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8195 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8196 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8197
8198#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8199 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8200 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8201 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8202
8203#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8204 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8205 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8206 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8207
8208
8209#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8211 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8212 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8213
8214#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8215 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8216 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8217 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8218
8219#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8220 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8221 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8222 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8223
8224#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8225 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8226 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8227 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8228
8229#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8230 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8231 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8232 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8233
8234
8235#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8236 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8237 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8238 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8239
8240#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8241 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8242 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8243 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8244
8245#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8246 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8247 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8248 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8249
8250#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8251 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8252 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8253 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8254
8255#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8256 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8257 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8258 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8259
8260
8261#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8262 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8263 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8264 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8265
8266#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8267 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8268 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8269 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8270
8271#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8272 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8273 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8274 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8275
8276#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8277 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8278 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8279 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8280
8281#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8282 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8283 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8284 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8285
8286
8287#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8288 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8289 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8290 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8291
8292#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8293 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8294 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8295 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8296
8297
8298#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8299 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8300 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8301 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8302
8303#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8304 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8305 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8306 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8307
8308#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8309 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8310 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8311 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8312
8313#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8314 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8315 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8316 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8317
8318
8319DECL_INLINE_THROW(uint32_t)
8320iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8321 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8322 uintptr_t pfnFunction, uint8_t idxInstr)
8323{
8324 /*
8325 * Assert sanity.
8326 */
8327 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8328 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8329 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8330 && pVarMem->cbVar == sizeof(void *),
8331 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8332
8333 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8334 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8335 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8336 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8337 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8338
8339 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8340 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8341 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8342 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8343 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8344
8345 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8346
8347 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8348
8349#ifdef VBOX_STRICT
8350# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8351 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8352 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8353 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8354 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8355# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8356 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8357 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8358 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8359
8360 if (iSegReg == UINT8_MAX)
8361 {
8362 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8363 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8364 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8365 switch (cbMem)
8366 {
8367 case 1:
8368 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8369 Assert(!fAlignMaskAndCtl);
8370 break;
8371 case 2:
8372 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8373 Assert(fAlignMaskAndCtl < 2);
8374 break;
8375 case 4:
8376 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8377 Assert(fAlignMaskAndCtl < 4);
8378 break;
8379 case 8:
8380 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8381 Assert(fAlignMaskAndCtl < 8);
8382 break;
8383 case 10:
8384 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8385 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8386 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8387 Assert(fAlignMaskAndCtl < 8);
8388 break;
8389 case 16:
8390 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8391 Assert(fAlignMaskAndCtl < 16);
8392 break;
8393# if 0
8394 case 32:
8395 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8396 Assert(fAlignMaskAndCtl < 32);
8397 break;
8398 case 64:
8399 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8400 Assert(fAlignMaskAndCtl < 64);
8401 break;
8402# endif
8403 default: AssertFailed(); break;
8404 }
8405 }
8406 else
8407 {
8408 Assert(iSegReg < 6);
8409 switch (cbMem)
8410 {
8411 case 1:
8412 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8413 Assert(!fAlignMaskAndCtl);
8414 break;
8415 case 2:
8416 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8417 Assert(fAlignMaskAndCtl < 2);
8418 break;
8419 case 4:
8420 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8421 Assert(fAlignMaskAndCtl < 4);
8422 break;
8423 case 8:
8424 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8425 Assert(fAlignMaskAndCtl < 8);
8426 break;
8427 case 10:
8428 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8429 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8430 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8431 Assert(fAlignMaskAndCtl < 8);
8432 break;
8433 case 16:
8434 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8435 Assert(fAlignMaskAndCtl < 16);
8436 break;
8437# if 0
8438 case 32:
8439 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8440 Assert(fAlignMaskAndCtl < 32);
8441 break;
8442 case 64:
8443 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8444 Assert(fAlignMaskAndCtl < 64);
8445 break;
8446# endif
8447 default: AssertFailed(); break;
8448 }
8449 }
8450# undef IEM_MAP_HLP_FN
8451# undef IEM_MAP_HLP_FN_NO_AT
8452#endif
8453
8454#ifdef VBOX_STRICT
8455 /*
8456 * Check that the fExec flags we've got make sense.
8457 */
8458 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8459#endif
8460
8461 /*
8462 * To keep things simple we have to commit any pending writes first as we
8463 * may end up making calls.
8464 */
8465 off = iemNativeRegFlushPendingWrites(pReNative, off);
8466
8467#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8468 /*
8469 * Move/spill/flush stuff out of call-volatile registers.
8470 * This is the easy way out. We could contain this to the tlb-miss branch
8471 * by saving and restoring active stuff here.
8472 */
8473 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8474 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8475#endif
8476
8477 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8478 while the tlb-miss codepath will temporarily put it on the stack.
8479 Set the the type to stack here so we don't need to do it twice below. */
8480 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8481 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8482 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8483 * lookup is done. */
8484
8485 /*
8486 * Define labels and allocate the result register (trying for the return
8487 * register if we can).
8488 */
8489 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8490 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8491 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8492 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8493 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8494 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8495 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8496 : UINT32_MAX;
8497//off=iemNativeEmitBrk(pReNative, off, 0);
8498 /*
8499 * Jump to the TLB lookup code.
8500 */
8501 if (!TlbState.fSkip)
8502 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8503
8504 /*
8505 * TlbMiss:
8506 *
8507 * Call helper to do the fetching.
8508 * We flush all guest register shadow copies here.
8509 */
8510 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8511
8512#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8513 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8514#else
8515 RT_NOREF(idxInstr);
8516#endif
8517
8518#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8519 /* Save variables in volatile registers. */
8520 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8521 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8522#endif
8523
8524 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8525 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8526#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8527 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8528#else
8529 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8530#endif
8531
8532 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8533 if (iSegReg != UINT8_MAX)
8534 {
8535 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8536 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8537 }
8538
8539 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8540 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8541 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8542
8543 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8544 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8545
8546 /* Done setting up parameters, make the call. */
8547 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8548
8549 /*
8550 * Put the output in the right registers.
8551 */
8552 Assert(idxRegMemResult == pVarMem->idxReg);
8553 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8555
8556#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8557 /* Restore variables and guest shadow registers to volatile registers. */
8558 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8559 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8560#endif
8561
8562 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8563 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8564
8565#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8566 if (!TlbState.fSkip)
8567 {
8568 /* end of tlbsmiss - Jump to the done label. */
8569 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8570 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8571
8572 /*
8573 * TlbLookup:
8574 */
8575 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8576 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8577# ifdef IEM_WITH_TLB_STATISTICS
8578 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8579 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8580# endif
8581
8582 /* [idxVarUnmapInfo] = 0; */
8583 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8584
8585 /*
8586 * TlbDone:
8587 */
8588 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8589
8590 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8591
8592# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8593 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8594 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8595# endif
8596 }
8597#else
8598 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8599#endif
8600
8601 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8602 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8603
8604 return off;
8605}
8606
8607
8608#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8609 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8610 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8611
8612#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8613 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8614 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8615
8616#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8617 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8618 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8619
8620#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8621 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8622 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8623
8624DECL_INLINE_THROW(uint32_t)
8625iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8626 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8627{
8628 /*
8629 * Assert sanity.
8630 */
8631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8632#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8633 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8634#endif
8635 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8636 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8637 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8638#ifdef VBOX_STRICT
8639 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8640 {
8641 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8642 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8643 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8644 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8645 case IEM_ACCESS_TYPE_WRITE:
8646 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8647 case IEM_ACCESS_TYPE_READ:
8648 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8649 default: AssertFailed();
8650 }
8651#else
8652 RT_NOREF(fAccess);
8653#endif
8654
8655 /*
8656 * To keep things simple we have to commit any pending writes first as we
8657 * may end up making calls (there shouldn't be any at this point, so this
8658 * is just for consistency).
8659 */
8660 /** @todo we could postpone this till we make the call and reload the
8661 * registers after returning from the call. Not sure if that's sensible or
8662 * not, though. */
8663 off = iemNativeRegFlushPendingWrites(pReNative, off);
8664
8665 /*
8666 * Move/spill/flush stuff out of call-volatile registers.
8667 *
8668 * We exclude any register holding the bUnmapInfo variable, as we'll be
8669 * checking it after returning from the call and will free it afterwards.
8670 */
8671 /** @todo save+restore active registers and maybe guest shadows in miss
8672 * scenario. */
8673 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8674 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8675
8676 /*
8677 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8678 * to call the unmap helper function.
8679 *
8680 * The likelyhood of it being zero is higher than for the TLB hit when doing
8681 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8682 * access should also end up with a mapping that won't need special unmapping.
8683 */
8684 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8685 * should speed up things for the pure interpreter as well when TLBs
8686 * are enabled. */
8687#ifdef RT_ARCH_AMD64
8688 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8689 {
8690 /* test byte [rbp - xxx], 0ffh */
8691 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8692 pbCodeBuf[off++] = 0xf6;
8693 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8694 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8695 pbCodeBuf[off++] = 0xff;
8696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8697 }
8698 else
8699#endif
8700 {
8701 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8702 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8703 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8704 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8705 }
8706 uint32_t const offJmpFixup = off;
8707 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8708
8709 /*
8710 * Call the unmap helper function.
8711 */
8712#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8713 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8714#else
8715 RT_NOREF(idxInstr);
8716#endif
8717
8718 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8719 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8720 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8721
8722 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8723 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8724
8725 /* Done setting up parameters, make the call. */
8726 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8727
8728 /* The bUnmapInfo variable is implictly free by these MCs. */
8729 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8730
8731 /*
8732 * Done, just fixup the jump for the non-call case.
8733 */
8734 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8735
8736 return off;
8737}
8738
8739
8740
8741/*********************************************************************************************************************************
8742* State and Exceptions *
8743*********************************************************************************************************************************/
8744
8745#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8746#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8747
8748#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8749#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8750#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8751
8752#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8753#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8754#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8755
8756
8757DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8758{
8759 /** @todo this needs a lot more work later. */
8760 RT_NOREF(pReNative, fForChange);
8761 return off;
8762}
8763
8764
8765
8766/*********************************************************************************************************************************
8767* Emitters for FPU related operations. *
8768*********************************************************************************************************************************/
8769
8770#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8771 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8772
8773/** Emits code for IEM_MC_FETCH_FCW. */
8774DECL_INLINE_THROW(uint32_t)
8775iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8776{
8777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8779
8780 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8781
8782 /* Allocate a temporary FCW register. */
8783 /** @todo eliminate extra register */
8784 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8785 kIemNativeGstRegUse_ReadOnly);
8786
8787 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8788
8789 /* Free but don't flush the FCW register. */
8790 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8791 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8792
8793 return off;
8794}
8795
8796
8797#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8798 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8799
8800/** Emits code for IEM_MC_FETCH_FSW. */
8801DECL_INLINE_THROW(uint32_t)
8802iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8803{
8804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8805 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8806
8807 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8808 /* Allocate a temporary FSW register. */
8809 /** @todo eliminate extra register */
8810 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8811 kIemNativeGstRegUse_ReadOnly);
8812
8813 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8814
8815 /* Free but don't flush the FSW register. */
8816 iemNativeRegFreeTmp(pReNative, idxFswReg);
8817 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8818
8819 return off;
8820}
8821
8822
8823
8824#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8825
8826
8827/*********************************************************************************************************************************
8828* Emitters for SSE/AVX specific operations. *
8829*********************************************************************************************************************************/
8830
8831#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8832 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8833
8834/** Emits code for IEM_MC_COPY_XREG_U128. */
8835DECL_INLINE_THROW(uint32_t)
8836iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8837{
8838 /* This is a nop if the source and destination register are the same. */
8839 if (iXRegDst != iXRegSrc)
8840 {
8841 /* Allocate destination and source register. */
8842 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8843 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8844 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8845 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8846
8847 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8848
8849 /* Free but don't flush the source and destination register. */
8850 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8851 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8852 }
8853
8854 return off;
8855}
8856
8857
8858#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8859 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8860
8861/** Emits code for IEM_MC_FETCH_XREG_U128. */
8862DECL_INLINE_THROW(uint32_t)
8863iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8864{
8865 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8866 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8867
8868 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8869 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8870
8871 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8872
8873 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8874
8875 /* Free but don't flush the source register. */
8876 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8877 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8878
8879 return off;
8880}
8881
8882
8883#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8884 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8885
8886#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8887 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8888
8889/** Emits code for IEM_MC_FETCH_XREG_U64. */
8890DECL_INLINE_THROW(uint32_t)
8891iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8892{
8893 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8894 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8895
8896 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8897 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8898
8899 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8900 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8901
8902 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8903
8904 /* Free but don't flush the source register. */
8905 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8907
8908 return off;
8909}
8910
8911
8912#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8913 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8914
8915#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8916 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8917
8918/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8919DECL_INLINE_THROW(uint32_t)
8920iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8921{
8922 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8923 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8924
8925 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8926 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8927
8928 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8929 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8930
8931 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8932
8933 /* Free but don't flush the source register. */
8934 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8935 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8936
8937 return off;
8938}
8939
8940
8941#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8942 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8943
8944/** Emits code for IEM_MC_FETCH_XREG_U16. */
8945DECL_INLINE_THROW(uint32_t)
8946iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8947{
8948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8949 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8950
8951 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8952 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8953
8954 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8955 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8956
8957 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
8958
8959 /* Free but don't flush the source register. */
8960 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8961 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8962
8963 return off;
8964}
8965
8966
8967#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
8968 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
8969
8970/** Emits code for IEM_MC_FETCH_XREG_U8. */
8971DECL_INLINE_THROW(uint32_t)
8972iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
8973{
8974 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8975 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
8976
8977 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8978 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8979
8980 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8981 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8982
8983 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
8984
8985 /* Free but don't flush the source register. */
8986 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8987 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8988
8989 return off;
8990}
8991
8992
8993#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
8994 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
8995
8996AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8997#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
8998 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
8999
9000
9001/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9002DECL_INLINE_THROW(uint32_t)
9003iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9004{
9005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9006 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9007
9008 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9009 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
9010 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9011
9012 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9013
9014 /* Free but don't flush the source register. */
9015 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9016 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9017
9018 return off;
9019}
9020
9021
9022#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9023 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9024
9025#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9026 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9027
9028#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9029 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9030
9031#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9032 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9033
9034#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9035 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9036
9037#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9038 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9039
9040/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9041DECL_INLINE_THROW(uint32_t)
9042iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9043 uint8_t cbLocal, uint8_t iElem)
9044{
9045 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9046 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9047
9048#ifdef VBOX_STRICT
9049 switch (cbLocal)
9050 {
9051 case sizeof(uint64_t): Assert(iElem < 2); break;
9052 case sizeof(uint32_t): Assert(iElem < 4); break;
9053 case sizeof(uint16_t): Assert(iElem < 8); break;
9054 case sizeof(uint8_t): Assert(iElem < 16); break;
9055 default: AssertFailed();
9056 }
9057#endif
9058
9059 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9060 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9061 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9062
9063 switch (cbLocal)
9064 {
9065 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9066 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9067 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9068 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9069 default: AssertFailed();
9070 }
9071
9072 /* Free but don't flush the source register. */
9073 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9074 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9075
9076 return off;
9077}
9078
9079
9080#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9081 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9082
9083/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9084DECL_INLINE_THROW(uint32_t)
9085iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9086{
9087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9088 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9089
9090 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9091 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9092 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9093
9094 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9095 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9096 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9097
9098 /* Free but don't flush the source register. */
9099 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9100 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9101
9102 return off;
9103}
9104
9105
9106#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9107 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9108
9109/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9110DECL_INLINE_THROW(uint32_t)
9111iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9112{
9113 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9114 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9115
9116 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9117 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9118 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9119
9120 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9121 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9122 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9123
9124 /* Free but don't flush the source register. */
9125 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9126 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9127
9128 return off;
9129}
9130
9131
9132#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9133 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9134
9135/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9136DECL_INLINE_THROW(uint32_t)
9137iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9138 uint8_t idxSrcVar, uint8_t iDwSrc)
9139{
9140 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9141 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9142
9143 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9144 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9145 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9146
9147 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9148 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9149
9150 /* Free but don't flush the destination register. */
9151 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9152 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9153
9154 return off;
9155}
9156
9157
9158#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9159 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9160
9161/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9162DECL_INLINE_THROW(uint32_t)
9163iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9164{
9165 /*
9166 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9167 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9168 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9169 */
9170 if (iYRegDst != iYRegSrc)
9171 {
9172 /* Allocate destination and source register. */
9173 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9174 kIemNativeGstSimdRegLdStSz_256,
9175 kIemNativeGstRegUse_ForFullWrite);
9176 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9177 kIemNativeGstSimdRegLdStSz_Low128,
9178 kIemNativeGstRegUse_ReadOnly);
9179
9180 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9181 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9182
9183 /* Free but don't flush the source and destination register. */
9184 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9185 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9186 }
9187 else
9188 {
9189 /* This effectively only clears the upper 128-bits of the register. */
9190 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9191 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9192
9193 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9194
9195 /* Free but don't flush the destination register. */
9196 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9197 }
9198
9199 return off;
9200}
9201
9202
9203#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9204 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9205
9206/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9207DECL_INLINE_THROW(uint32_t)
9208iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9209{
9210 /*
9211 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9212 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9213 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9214 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
9215 */
9216 if (iYRegDst != iYRegSrc)
9217 {
9218 /* Allocate destination and source register. */
9219 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9220 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9221 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9222 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9223
9224 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9225
9226 /* Free but don't flush the source and destination register. */
9227 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9228 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9229 }
9230
9231 return off;
9232}
9233
9234
9235#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9236 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9237
9238/** Emits code for IEM_MC_FETCH_YREG_U128. */
9239DECL_INLINE_THROW(uint32_t)
9240iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9241{
9242 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9243 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9244
9245 Assert(iDQWord <= 1);
9246 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9247 iDQWord == 1
9248 ? kIemNativeGstSimdRegLdStSz_High128
9249 : kIemNativeGstSimdRegLdStSz_Low128,
9250 kIemNativeGstRegUse_ReadOnly);
9251
9252 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9253 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9254
9255 if (iDQWord == 1)
9256 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9257 else
9258 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9259
9260 /* Free but don't flush the source register. */
9261 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9262 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9263
9264 return off;
9265}
9266
9267
9268#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9269 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9270
9271/** Emits code for IEM_MC_FETCH_YREG_U64. */
9272DECL_INLINE_THROW(uint32_t)
9273iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9274{
9275 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9276 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9277
9278 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9279 iQWord >= 2
9280 ? kIemNativeGstSimdRegLdStSz_High128
9281 : kIemNativeGstSimdRegLdStSz_Low128,
9282 kIemNativeGstRegUse_ReadOnly);
9283
9284 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9285 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9286
9287 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9288
9289 /* Free but don't flush the source register. */
9290 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9291 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9292
9293 return off;
9294}
9295
9296
9297#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9298 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9299
9300/** Emits code for IEM_MC_FETCH_YREG_U32. */
9301DECL_INLINE_THROW(uint32_t)
9302iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9303{
9304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9305 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9306
9307 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9308 iDWord >= 4
9309 ? kIemNativeGstSimdRegLdStSz_High128
9310 : kIemNativeGstSimdRegLdStSz_Low128,
9311 kIemNativeGstRegUse_ReadOnly);
9312
9313 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9314 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9315
9316 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9317
9318 /* Free but don't flush the source register. */
9319 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9320 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9321
9322 return off;
9323}
9324
9325
9326#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9327 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9328
9329/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9330DECL_INLINE_THROW(uint32_t)
9331iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9332{
9333 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9334 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9335
9336 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9337
9338 /* Free but don't flush the register. */
9339 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9340
9341 return off;
9342}
9343
9344
9345#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9346 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9347
9348/** Emits code for IEM_MC_STORE_YREG_U128. */
9349DECL_INLINE_THROW(uint32_t)
9350iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9351{
9352 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9353 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9354
9355 Assert(iDQword <= 1);
9356 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9357 iDQword == 0
9358 ? kIemNativeGstSimdRegLdStSz_Low128
9359 : kIemNativeGstSimdRegLdStSz_High128,
9360 kIemNativeGstRegUse_ForFullWrite);
9361
9362 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9363
9364 if (iDQword == 0)
9365 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9366 else
9367 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9368
9369 /* Free but don't flush the source register. */
9370 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9371 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9372
9373 return off;
9374}
9375
9376
9377#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9378 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9379
9380/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9381DECL_INLINE_THROW(uint32_t)
9382iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9383{
9384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9385 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9386
9387 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9388 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9389
9390 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9391
9392 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9393 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9394
9395 /* Free but don't flush the source register. */
9396 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9397 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9398
9399 return off;
9400}
9401
9402
9403#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9404 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9405
9406/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9407DECL_INLINE_THROW(uint32_t)
9408iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9409{
9410 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9411 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9412
9413 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9414 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9415
9416 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9417
9418 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9419 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9420
9421 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9422 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9423
9424 return off;
9425}
9426
9427
9428#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9429 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9430
9431/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9432DECL_INLINE_THROW(uint32_t)
9433iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9434{
9435 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9436 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9437
9438 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9439 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9440
9441 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9442
9443 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9444 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9445
9446 /* Free but don't flush the source register. */
9447 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9448 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9449
9450 return off;
9451}
9452
9453
9454#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9455 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9456
9457/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9458DECL_INLINE_THROW(uint32_t)
9459iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9460{
9461 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9462 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9463
9464 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9465 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9466
9467 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9468
9469 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9470 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9471
9472 /* Free but don't flush the source register. */
9473 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9474 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9475
9476 return off;
9477}
9478
9479
9480#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9481 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9482
9483/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9484DECL_INLINE_THROW(uint32_t)
9485iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9486{
9487 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9488 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9489
9490 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9491 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9492
9493 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9494
9495 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9496 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9497
9498 /* Free but don't flush the source register. */
9499 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9500 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9501
9502 return off;
9503}
9504
9505
9506#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9507 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9508
9509/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9510DECL_INLINE_THROW(uint32_t)
9511iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9512{
9513 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9514 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9515
9516 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9517 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9518
9519 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9520
9521 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9522
9523 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9524 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9525
9526 return off;
9527}
9528
9529
9530#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9531 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9532
9533/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9534DECL_INLINE_THROW(uint32_t)
9535iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9536{
9537 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9538 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9539
9540 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9541 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9542
9543 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9544
9545 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9546
9547 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9548 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9549
9550 return off;
9551}
9552
9553
9554#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9555 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9556
9557/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9558DECL_INLINE_THROW(uint32_t)
9559iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9560{
9561 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9562 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9563
9564 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9565 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9566
9567 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9568
9569 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9570
9571 /* Free but don't flush the source register. */
9572 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9573 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9574
9575 return off;
9576}
9577
9578
9579#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9580 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9581
9582/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9583DECL_INLINE_THROW(uint32_t)
9584iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9585{
9586 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9587 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9588
9589 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9590 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9591
9592 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9593
9594 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9595
9596 /* Free but don't flush the source register. */
9597 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9598 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9599
9600 return off;
9601}
9602
9603
9604#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9605 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9606
9607/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9608DECL_INLINE_THROW(uint32_t)
9609iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9610{
9611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9612 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9613
9614 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9615 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9616
9617 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9618
9619 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9620
9621 /* Free but don't flush the source register. */
9622 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9623 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9624
9625 return off;
9626}
9627
9628
9629#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9630 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9631
9632/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9633DECL_INLINE_THROW(uint32_t)
9634iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9635{
9636 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9637 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9638
9639 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9640 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9641
9642 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9643
9644 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9645 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9646
9647 /* Free but don't flush the source register. */
9648 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9649 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9650
9651 return off;
9652}
9653
9654
9655#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9656 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9657
9658/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9659DECL_INLINE_THROW(uint32_t)
9660iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9661{
9662 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9663 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9664
9665 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9666 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9667
9668 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9669
9670 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9671 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9672
9673 /* Free but don't flush the source register. */
9674 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9675 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9676
9677 return off;
9678}
9679
9680
9681#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9682 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9683
9684/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9685DECL_INLINE_THROW(uint32_t)
9686iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9687{
9688 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9689 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9690
9691 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9692 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9693 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9694 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9695 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9696
9697 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9698 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9699 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9700
9701 /* Free but don't flush the source and destination registers. */
9702 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9703 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9704 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9705
9706 return off;
9707}
9708
9709
9710#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9711 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9712
9713/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9714DECL_INLINE_THROW(uint32_t)
9715iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9716{
9717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9719
9720 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9721 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9722 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9723 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9724 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9725
9726 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9727 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9728 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9729
9730 /* Free but don't flush the source and destination registers. */
9731 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9732 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9733 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9734
9735 return off;
9736}
9737
9738
9739#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9740 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9741
9742
9743/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9744DECL_INLINE_THROW(uint32_t)
9745iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9746{
9747 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9748 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9749
9750 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9751 if (bImm8Mask & RT_BIT(0))
9752 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9753 if (bImm8Mask & RT_BIT(1))
9754 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9755 if (bImm8Mask & RT_BIT(2))
9756 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9757 if (bImm8Mask & RT_BIT(3))
9758 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9759
9760 /* Free but don't flush the destination register. */
9761 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9762
9763 return off;
9764}
9765
9766
9767#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9768 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9769
9770
9771/** Emits code for IEM_MC_FETCH_YREG_U256. */
9772DECL_INLINE_THROW(uint32_t)
9773iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9774{
9775 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9776 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9777
9778 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9779 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9780 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9781
9782 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9783
9784 /* Free but don't flush the source register. */
9785 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9786 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9787
9788 return off;
9789}
9790
9791
9792#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9793 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9794
9795
9796/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
9797DECL_INLINE_THROW(uint32_t)
9798iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9799{
9800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9802
9803 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9804 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9805 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9806
9807 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9808
9809 /* Free but don't flush the source register. */
9810 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9811 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9812
9813 return off;
9814}
9815
9816
9817#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9818 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9819
9820
9821/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9822DECL_INLINE_THROW(uint32_t)
9823iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9824 uint8_t idxSrcVar, uint8_t iDwSrc)
9825{
9826 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9827 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9828
9829 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9830 iDwDst < 4
9831 ? kIemNativeGstSimdRegLdStSz_Low128
9832 : kIemNativeGstSimdRegLdStSz_High128,
9833 kIemNativeGstRegUse_ForUpdate);
9834 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9835 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9836
9837 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9838 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9839
9840 /* Free but don't flush the source register. */
9841 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9842 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9843 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9844
9845 return off;
9846}
9847
9848
9849#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9850 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9851
9852
9853/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9854DECL_INLINE_THROW(uint32_t)
9855iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9856 uint8_t idxSrcVar, uint8_t iQwSrc)
9857{
9858 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9859 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9860
9861 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9862 iQwDst < 2
9863 ? kIemNativeGstSimdRegLdStSz_Low128
9864 : kIemNativeGstSimdRegLdStSz_High128,
9865 kIemNativeGstRegUse_ForUpdate);
9866 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9867 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9868
9869 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9870 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9871
9872 /* Free but don't flush the source register. */
9873 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9874 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9875 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9876
9877 return off;
9878}
9879
9880
9881#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9882 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9883
9884
9885/** Emits code for IEM_MC_STORE_YREG_U64. */
9886DECL_INLINE_THROW(uint32_t)
9887iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9888{
9889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9891
9892 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9893 iQwDst < 2
9894 ? kIemNativeGstSimdRegLdStSz_Low128
9895 : kIemNativeGstSimdRegLdStSz_High128,
9896 kIemNativeGstRegUse_ForUpdate);
9897
9898 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9899
9900 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9901
9902 /* Free but don't flush the source register. */
9903 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9904 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9905
9906 return off;
9907}
9908
9909
9910#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9911 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9912
9913/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9914DECL_INLINE_THROW(uint32_t)
9915iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9916{
9917 RT_NOREF(pReNative, iYReg);
9918 /** @todo Needs to be implemented when support for AVX-512 is added. */
9919 return off;
9920}
9921
9922
9923
9924/*********************************************************************************************************************************
9925* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
9926*********************************************************************************************************************************/
9927
9928/**
9929 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
9930 */
9931DECL_INLINE_THROW(uint32_t)
9932iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
9933{
9934 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
9935 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9936 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
9937 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9938
9939 /*
9940 * Need to do the FPU preparation.
9941 */
9942 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
9943
9944 /*
9945 * Do all the call setup and cleanup.
9946 */
9947 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
9948
9949 /*
9950 * Load the MXCSR register into the first argument and mask out the current exception flags.
9951 */
9952 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
9953 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
9954
9955 /*
9956 * Make the call.
9957 */
9958 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9959
9960 /*
9961 * The updated MXCSR is in the return register.
9962 */
9963 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
9964
9965#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9966 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
9967 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
9968#endif
9969 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9970
9971 return off;
9972}
9973
9974
9975#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
9976 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9977
9978/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
9979DECL_INLINE_THROW(uint32_t)
9980iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9981{
9982 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9983 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9984 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9985}
9986
9987
9988#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9989 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9990
9991/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
9992DECL_INLINE_THROW(uint32_t)
9993iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9994{
9995 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9996 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9997 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9998 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9999}
10000
10001
10002/*********************************************************************************************************************************
10003* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10004*********************************************************************************************************************************/
10005
10006#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10007 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10008
10009/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10010DECL_INLINE_THROW(uint32_t)
10011iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10012{
10013 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10014 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10015 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10016}
10017
10018
10019#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10020 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10021
10022/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10023DECL_INLINE_THROW(uint32_t)
10024iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10025{
10026 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10027 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10028 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10029 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10030}
10031#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10032
10033
10034/*********************************************************************************************************************************
10035* Include instruction emitters. *
10036*********************************************************************************************************************************/
10037#include "target-x86/IEMAllN8veEmit-x86.h"
10038
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette