VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105234

Last change on this file since 105234 was 105183, checked in by vboxsync, 7 months ago

VMM/IEM: A few new IEM MC statements required for AVX/AVX2 floating point instruction emulations, bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 487.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105183 2024-07-08 12:26:36Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 kIemNativeExitReason_ReturnWithFlags);
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** Helper for iemNativeEmitFinishInstructionWithStatus. */
348DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
349{
350 unsigned const offOpcodes = pCallEntry->offOpcode;
351 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
352 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
353 {
354 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
355 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
356 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
357 }
358 AssertFailedReturn(NIL_RTGCPHYS);
359}
360
361
362/** The VINF_SUCCESS dummy. */
363template<int const a_rcNormal, bool const a_fIsJump>
364DECL_FORCE_INLINE_THROW(uint32_t)
365iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
366 int32_t const offJump)
367{
368 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
369 if (a_rcNormal != VINF_SUCCESS)
370 {
371#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
373#else
374 RT_NOREF_PV(pCallEntry);
375#endif
376
377 /* As this code returns from the TB any pending register writes must be flushed. */
378 off = iemNativeRegFlushPendingWrites(pReNative, off);
379
380 /*
381 * Use the lookup table for getting to the next TB quickly.
382 * Note! In this code path there can only be one entry at present.
383 */
384 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
385 PCIEMTB const pTbOrg = pReNative->pTbOrg;
386 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
387 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
388
389#if 0
390 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
391 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
392 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
393 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
394 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
395
396 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreak);
397
398#else
399 /* Load the index as argument #1 for the helper call at the given label. */
400 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
401
402 /*
403 * Figure out the physical address of the current instruction and see
404 * whether the next instruction we're about to execute is in the same
405 * page so we by can optimistically skip TLB loading.
406 *
407 * - This is safe for all cases in FLAT mode.
408 * - In segmentmented modes it is complicated, given that a negative
409 * jump may underflow EIP and a forward jump may overflow or run into
410 * CS.LIM and triggering a #GP. The only thing we can get away with
411 * now at compile time is forward jumps w/o CS.LIM checks, since the
412 * lack of CS.LIM checks means we're good for the entire physical page
413 * we're executing on and another 15 bytes before we run into CS.LIM.
414 */
415 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
416# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
417 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
418# endif
419 )
420 {
421 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
422 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
423 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
424 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
425
426 {
427 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
428 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
429
430 /* Load the key lookup flags into the 2nd argument for the helper call.
431 - This is safe wrt CS limit checking since we're only here for FLAT modes.
432 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
433 interrupt shadow.
434 - The NMI inhibiting is more questionable, though... */
435 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
436 * Should we copy it into fExec to simplify this? OTOH, it's just a
437 * couple of extra instructions if EFLAGS are already in a register. */
438 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
439 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
440
441 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
442 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookup);
443 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithIrq);
444 }
445 }
446 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
447 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithTlb);
448 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithTlbAndIrq);
449#endif
450 }
451 return off;
452}
453
454
455#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
456 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
457 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
458
459#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
460 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
461 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
462 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
463
464/** Same as iemRegAddToRip64AndFinishingNoFlags. */
465DECL_INLINE_THROW(uint32_t)
466iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
467{
468#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
469# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
470 if (!pReNative->Core.offPc)
471 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
472# endif
473
474 /* Allocate a temporary PC register. */
475 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
476
477 /* Perform the addition and store the result. */
478 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
479 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
480
481 /* Free but don't flush the PC register. */
482 iemNativeRegFreeTmp(pReNative, idxPcReg);
483#endif
484
485#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
486 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
487
488 pReNative->Core.offPc += cbInstr;
489# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
490 off = iemNativePcAdjustCheck(pReNative, off);
491# endif
492 if (pReNative->cCondDepth)
493 off = iemNativeEmitPcWriteback(pReNative, off);
494 else
495 pReNative->Core.cInstrPcUpdateSkipped++;
496#endif
497
498 return off;
499}
500
501
502#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
503 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
504 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
505
506#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
507 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
508 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
509 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
510
511/** Same as iemRegAddToEip32AndFinishingNoFlags. */
512DECL_INLINE_THROW(uint32_t)
513iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
514{
515#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
516# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
517 if (!pReNative->Core.offPc)
518 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
519# endif
520
521 /* Allocate a temporary PC register. */
522 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
523
524 /* Perform the addition and store the result. */
525 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
527
528 /* Free but don't flush the PC register. */
529 iemNativeRegFreeTmp(pReNative, idxPcReg);
530#endif
531
532#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
533 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
534
535 pReNative->Core.offPc += cbInstr;
536# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
537 off = iemNativePcAdjustCheck(pReNative, off);
538# endif
539 if (pReNative->cCondDepth)
540 off = iemNativeEmitPcWriteback(pReNative, off);
541 else
542 pReNative->Core.cInstrPcUpdateSkipped++;
543#endif
544
545 return off;
546}
547
548
549#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
550 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
552
553#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
554 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
557
558/** Same as iemRegAddToIp16AndFinishingNoFlags. */
559DECL_INLINE_THROW(uint32_t)
560iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
561{
562#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
563# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
564 if (!pReNative->Core.offPc)
565 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
566# endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition and store the result. */
572 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
573 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
574 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
575
576 /* Free but don't flush the PC register. */
577 iemNativeRegFreeTmp(pReNative, idxPcReg);
578#endif
579
580#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
581 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
582
583 pReNative->Core.offPc += cbInstr;
584# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
585 off = iemNativePcAdjustCheck(pReNative, off);
586# endif
587 if (pReNative->cCondDepth)
588 off = iemNativeEmitPcWriteback(pReNative, off);
589 else
590 pReNative->Core.cInstrPcUpdateSkipped++;
591#endif
592
593 return off;
594}
595
596
597
598/*********************************************************************************************************************************
599* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
600*********************************************************************************************************************************/
601
602#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
603 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
604 (a_enmEffOpSize), pCallEntry->idxInstr); \
605 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
606
607#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
608 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
609 (a_enmEffOpSize), pCallEntry->idxInstr); \
610 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
611 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
612
613#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
614 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
615 IEMMODE_16BIT, pCallEntry->idxInstr); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
617
618#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
620 IEMMODE_16BIT, pCallEntry->idxInstr); \
621 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
622 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
623
624#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
625 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
626 IEMMODE_64BIT, pCallEntry->idxInstr); \
627 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
628
629#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
630 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
631 IEMMODE_64BIT, pCallEntry->idxInstr); \
632 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
633 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
634
635/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
636 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
637 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
640 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
641{
642 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
643
644 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
645 off = iemNativeRegFlushPendingWrites(pReNative, off);
646
647#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
648 Assert(pReNative->Core.offPc == 0);
649
650 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
651#endif
652
653 /* Allocate a temporary PC register. */
654 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
655
656 /* Perform the addition. */
657 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
658
659 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
660 {
661 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
662 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
663 }
664 else
665 {
666 /* Just truncate the result to 16-bit IP. */
667 Assert(enmEffOpSize == IEMMODE_16BIT);
668 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
669 }
670 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
671
672 /* Free but don't flush the PC register. */
673 iemNativeRegFreeTmp(pReNative, idxPcReg);
674
675 return off;
676}
677
678
679#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
680 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
681 (a_enmEffOpSize), pCallEntry->idxInstr); \
682 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
683
684#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
685 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
686 (a_enmEffOpSize), pCallEntry->idxInstr); \
687 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
689
690#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
692 IEMMODE_16BIT, pCallEntry->idxInstr); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
694
695#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
696 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
697 IEMMODE_16BIT, pCallEntry->idxInstr); \
698 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
699 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
700
701#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
702 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
703 IEMMODE_32BIT, pCallEntry->idxInstr); \
704 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
705
706#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
707 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
708 IEMMODE_32BIT, pCallEntry->idxInstr); \
709 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
710 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
711
712/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
713 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
714 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
715DECL_INLINE_THROW(uint32_t)
716iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
717 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
718{
719 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
720
721 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
722 off = iemNativeRegFlushPendingWrites(pReNative, off);
723
724#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
725 Assert(pReNative->Core.offPc == 0);
726
727 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
728#endif
729
730 /* Allocate a temporary PC register. */
731 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
732
733 /* Perform the addition. */
734 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
735
736 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
737 if (enmEffOpSize == IEMMODE_16BIT)
738 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
739
740 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
741/** @todo we can skip this in 32-bit FLAT mode. */
742 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
743
744 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
745
746 /* Free but don't flush the PC register. */
747 iemNativeRegFreeTmp(pReNative, idxPcReg);
748
749 return off;
750}
751
752
753#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
754 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
755 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
756
757#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
758 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
759 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
760 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
761
762#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
763 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
764 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
765
766#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
767 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
768 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
769 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
770
771#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
772 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
773 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
774
775#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
776 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
777 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
778 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
779
780/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
781DECL_INLINE_THROW(uint32_t)
782iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
783 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
784{
785 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
786 off = iemNativeRegFlushPendingWrites(pReNative, off);
787
788#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
789 Assert(pReNative->Core.offPc == 0);
790
791 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
792#endif
793
794 /* Allocate a temporary PC register. */
795 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
796
797 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
798 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
799 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
800 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
801 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
802
803 /* Free but don't flush the PC register. */
804 iemNativeRegFreeTmp(pReNative, idxPcReg);
805
806 return off;
807}
808
809
810
811/*********************************************************************************************************************************
812* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
813*********************************************************************************************************************************/
814
815/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
816#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
817 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
818
819/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
820#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
821 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
822
823/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
824#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
825 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
826
827/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
828 * clears flags. */
829#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
830 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
831 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
832
833/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
834 * clears flags. */
835#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
836 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
837 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
838
839/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
840 * clears flags. */
841#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
842 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
843 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
844
845#undef IEM_MC_SET_RIP_U16_AND_FINISH
846
847
848/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
849#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
850 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
851
852/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
853#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
854 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
855
856/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
857 * clears flags. */
858#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
859 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
860 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
861
862/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
863 * and clears flags. */
864#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
865 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
866 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
867
868#undef IEM_MC_SET_RIP_U32_AND_FINISH
869
870
871/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
872#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
873 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
874
875/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
876 * and clears flags. */
877#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
878 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
879 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
880
881#undef IEM_MC_SET_RIP_U64_AND_FINISH
882
883
884/** Same as iemRegRipJumpU16AndFinishNoFlags,
885 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
886DECL_INLINE_THROW(uint32_t)
887iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
888 uint8_t idxInstr, uint8_t cbVar)
889{
890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
892
893 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
894 off = iemNativeRegFlushPendingWrites(pReNative, off);
895
896#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
897 Assert(pReNative->Core.offPc == 0);
898
899 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
900#endif
901
902 /* Get a register with the new PC loaded from idxVarPc.
903 Note! This ASSUMES that the high bits of the GPR is zeroed. */
904 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
905
906 /* Check limit (may #GP(0) + exit TB). */
907 if (!f64Bit)
908/** @todo we can skip this test in FLAT 32-bit mode. */
909 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
910 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
911 else if (cbVar > sizeof(uint32_t))
912 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
913
914 /* Store the result. */
915 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
916
917 iemNativeVarRegisterRelease(pReNative, idxVarPc);
918 /** @todo implictly free the variable? */
919
920 return off;
921}
922
923
924
925/*********************************************************************************************************************************
926* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
927*********************************************************************************************************************************/
928
929/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
930 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
931DECL_FORCE_INLINE_THROW(uint32_t)
932iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
933{
934 /* Use16BitSp: */
935#ifdef RT_ARCH_AMD64
936 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
937 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
938#else
939 /* sub regeff, regrsp, #cbMem */
940 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
941 /* and regeff, regeff, #0xffff */
942 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
943 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
944 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
945 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
946#endif
947 return off;
948}
949
950
951DECL_FORCE_INLINE(uint32_t)
952iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
953{
954 /* Use32BitSp: */
955 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
956 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
957 return off;
958}
959
960
961DECL_INLINE_THROW(uint32_t)
962iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
963 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
964{
965 /*
966 * Assert sanity.
967 */
968#ifdef VBOX_STRICT
969 if (RT_BYTE2(cBitsVarAndFlat) != 0)
970 {
971 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
972 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
973 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
974 Assert( pfnFunction
975 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
976 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
977 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
978 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
979 : UINT64_C(0xc000b000a0009000) ));
980 }
981 else
982 Assert( pfnFunction
983 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
984 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
985 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
986 : UINT64_C(0xc000b000a0009000) ));
987#endif
988
989#ifdef VBOX_STRICT
990 /*
991 * Check that the fExec flags we've got make sense.
992 */
993 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
994#endif
995
996 /*
997 * To keep things simple we have to commit any pending writes first as we
998 * may end up making calls.
999 */
1000 /** @todo we could postpone this till we make the call and reload the
1001 * registers after returning from the call. Not sure if that's sensible or
1002 * not, though. */
1003 off = iemNativeRegFlushPendingWrites(pReNative, off);
1004
1005 /*
1006 * First we calculate the new RSP and the effective stack pointer value.
1007 * For 64-bit mode and flat 32-bit these two are the same.
1008 * (Code structure is very similar to that of PUSH)
1009 */
1010 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1011 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1012 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1013 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1014 ? cbMem : sizeof(uint16_t);
1015 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1016 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1017 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1018 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1019 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1020 if (cBitsFlat != 0)
1021 {
1022 Assert(idxRegEffSp == idxRegRsp);
1023 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1024 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1025 if (cBitsFlat == 64)
1026 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1027 else
1028 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1029 }
1030 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1031 {
1032 Assert(idxRegEffSp != idxRegRsp);
1033 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1034 kIemNativeGstRegUse_ReadOnly);
1035#ifdef RT_ARCH_AMD64
1036 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1037#else
1038 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1039#endif
1040 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1041 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1042 offFixupJumpToUseOtherBitSp = off;
1043 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1044 {
1045 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1046 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1047 }
1048 else
1049 {
1050 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1051 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1052 }
1053 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1054 }
1055 /* SpUpdateEnd: */
1056 uint32_t const offLabelSpUpdateEnd = off;
1057
1058 /*
1059 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1060 * we're skipping lookup).
1061 */
1062 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1063 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1064 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1065 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1066 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1067 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1068 : UINT32_MAX;
1069 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1070
1071
1072 if (!TlbState.fSkip)
1073 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1074 else
1075 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1076
1077 /*
1078 * Use16BitSp:
1079 */
1080 if (cBitsFlat == 0)
1081 {
1082#ifdef RT_ARCH_AMD64
1083 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1084#else
1085 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1086#endif
1087 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1088 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1089 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1090 else
1091 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1092 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1094 }
1095
1096 /*
1097 * TlbMiss:
1098 *
1099 * Call helper to do the pushing.
1100 */
1101 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1102
1103#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1104 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1105#else
1106 RT_NOREF(idxInstr);
1107#endif
1108
1109 /* Save variables in volatile registers. */
1110 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1111 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1112 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1113 | (RT_BIT_32(idxRegPc));
1114 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1115
1116 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1117 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1118 {
1119 /* Swap them using ARG0 as temp register: */
1120 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1121 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1122 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1123 }
1124 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1125 {
1126 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1127 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1128
1129 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1130 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1131 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1132 }
1133 else
1134 {
1135 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1137
1138 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1140 }
1141
1142 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1143 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1144
1145 /* Done setting up parameters, make the call. */
1146 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1147
1148 /* Restore variables and guest shadow registers to volatile registers. */
1149 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1150 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1151
1152#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1153 if (!TlbState.fSkip)
1154 {
1155 /* end of TlbMiss - Jump to the done label. */
1156 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1157 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1158
1159 /*
1160 * TlbLookup:
1161 */
1162 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1163 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1164
1165 /*
1166 * Emit code to do the actual storing / fetching.
1167 */
1168 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1169# ifdef IEM_WITH_TLB_STATISTICS
1170 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1171 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1172# endif
1173 switch (cbMemAccess)
1174 {
1175 case 2:
1176 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1177 break;
1178 case 4:
1179 if (!fIsIntelSeg)
1180 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1181 else
1182 {
1183 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1184 PUSH FS in real mode, so we have to try emulate that here.
1185 We borrow the now unused idxReg1 from the TLB lookup code here. */
1186 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1187 kIemNativeGstReg_EFlags);
1188 if (idxRegEfl != UINT8_MAX)
1189 {
1190#ifdef ARCH_AMD64
1191 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1192 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1193 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1194#else
1195 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1196 off, TlbState.idxReg1, idxRegEfl,
1197 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1198#endif
1199 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1200 }
1201 else
1202 {
1203 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1204 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1205 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1206 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1207 }
1208 /* ASSUMES the upper half of idxRegPc is ZERO. */
1209 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1210 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1211 }
1212 break;
1213 case 8:
1214 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1215 break;
1216 default:
1217 AssertFailed();
1218 }
1219
1220 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1221 TlbState.freeRegsAndReleaseVars(pReNative);
1222
1223 /*
1224 * TlbDone:
1225 *
1226 * Commit the new RSP value.
1227 */
1228 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1229 }
1230#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1231
1232#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1233 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1234#endif
1235 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1236 if (idxRegEffSp != idxRegRsp)
1237 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1238
1239 return off;
1240}
1241
1242
1243/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1244#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1245 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1246
1247/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1248 * clears flags. */
1249#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1250 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1251 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1252
1253/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1254#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1255 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1256
1257/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1258 * clears flags. */
1259#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1260 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1261 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1262
1263#undef IEM_MC_IND_CALL_U16_AND_FINISH
1264
1265
1266/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1267#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1268 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1269
1270/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1271 * clears flags. */
1272#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1273 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1274 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1275
1276#undef IEM_MC_IND_CALL_U32_AND_FINISH
1277
1278
1279/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1280 * an extra parameter, for use in 64-bit code. */
1281#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1282 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1283
1284
1285/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1286 * an extra parameter, for use in 64-bit code and we need to check and clear
1287 * flags. */
1288#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1289 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1290 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1291
1292#undef IEM_MC_IND_CALL_U64_AND_FINISH
1293
1294/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1295 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1298 uint8_t idxInstr, uint8_t cbVar)
1299{
1300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1301 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1302
1303 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1304 off = iemNativeRegFlushPendingWrites(pReNative, off);
1305
1306#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1307 Assert(pReNative->Core.offPc == 0);
1308
1309 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1310#endif
1311
1312 /* Get a register with the new PC loaded from idxVarPc.
1313 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1314 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1315
1316 /* Check limit (may #GP(0) + exit TB). */
1317 if (!f64Bit)
1318/** @todo we can skip this test in FLAT 32-bit mode. */
1319 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1320 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1321 else if (cbVar > sizeof(uint32_t))
1322 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1323
1324#if 1
1325 /* Allocate a temporary PC register, we don't want it shadowed. */
1326 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1327 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1328#else
1329 /* Allocate a temporary PC register. */
1330 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1331 true /*fNoVolatileRegs*/);
1332#endif
1333
1334 /* Perform the addition and push the variable to the guest stack. */
1335 /** @todo Flat variants for PC32 variants. */
1336 switch (cbVar)
1337 {
1338 case sizeof(uint16_t):
1339 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1340 /* Truncate the result to 16-bit IP. */
1341 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1342 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1343 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1344 break;
1345 case sizeof(uint32_t):
1346 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1347 /** @todo In FLAT mode we can use the flat variant. */
1348 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1349 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1350 break;
1351 case sizeof(uint64_t):
1352 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1353 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1354 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1355 break;
1356 default:
1357 AssertFailed();
1358 }
1359
1360 /* RSP got changed, so do this again. */
1361 off = iemNativeRegFlushPendingWrites(pReNative, off);
1362
1363 /* Store the result. */
1364 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1365
1366#if 1
1367 /* Need to transfer the shadow information to the new RIP register. */
1368 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1369#else
1370 /* Sync the new PC. */
1371 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1372#endif
1373 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1374 iemNativeRegFreeTmp(pReNative, idxPcReg);
1375 /** @todo implictly free the variable? */
1376
1377 return off;
1378}
1379
1380
1381/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1382 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1383#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1384 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1385
1386/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1387 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1388 * flags. */
1389#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1390 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1391 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1392
1393/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1394 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1395#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1396 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1397
1398/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1399 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1400 * flags. */
1401#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1402 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1403 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1404
1405/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1406 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1407#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1408 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1409
1410/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1411 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1412 * flags. */
1413#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1414 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1415 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1416
1417#undef IEM_MC_REL_CALL_S16_AND_FINISH
1418
1419/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1420 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1421DECL_INLINE_THROW(uint32_t)
1422iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1423 uint8_t idxInstr)
1424{
1425 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1426 off = iemNativeRegFlushPendingWrites(pReNative, off);
1427
1428#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1429 Assert(pReNative->Core.offPc == 0);
1430
1431 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1432#endif
1433
1434 /* Allocate a temporary PC register. */
1435 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1436 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1437 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1438
1439 /* Calculate the new RIP. */
1440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1441 /* Truncate the result to 16-bit IP. */
1442 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1443 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1444 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1445
1446 /* Truncate the result to 16-bit IP. */
1447 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1448
1449 /* Check limit (may #GP(0) + exit TB). */
1450 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1451
1452 /* Perform the addition and push the variable to the guest stack. */
1453 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1454 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1455
1456 /* RSP got changed, so flush again. */
1457 off = iemNativeRegFlushPendingWrites(pReNative, off);
1458
1459 /* Store the result. */
1460 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1461
1462 /* Need to transfer the shadow information to the new RIP register. */
1463 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1464 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1465 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1466
1467 return off;
1468}
1469
1470
1471/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1472 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1473#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1474 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1475
1476/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1477 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1478 * flags. */
1479#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1480 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1481 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1482
1483#undef IEM_MC_REL_CALL_S32_AND_FINISH
1484
1485/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1486 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1487DECL_INLINE_THROW(uint32_t)
1488iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1489 uint8_t idxInstr)
1490{
1491 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1492 off = iemNativeRegFlushPendingWrites(pReNative, off);
1493
1494#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1495 Assert(pReNative->Core.offPc == 0);
1496
1497 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1498#endif
1499
1500 /* Allocate a temporary PC register. */
1501 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1502 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1503 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1504
1505 /* Update the EIP to get the return address. */
1506 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1507
1508 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1509 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1510 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1511 /** @todo we can skip this test in FLAT 32-bit mode. */
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1513
1514 /* Perform Perform the return address to the guest stack. */
1515 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1516 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1517 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1518
1519 /* RSP got changed, so do this again. */
1520 off = iemNativeRegFlushPendingWrites(pReNative, off);
1521
1522 /* Store the result. */
1523 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1524
1525 /* Need to transfer the shadow information to the new RIP register. */
1526 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1527 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1528 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1529
1530 return off;
1531}
1532
1533
1534/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1535 * an extra parameter, for use in 64-bit code. */
1536#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1537 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1538
1539/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1540 * an extra parameter, for use in 64-bit code and we need to check and clear
1541 * flags. */
1542#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1543 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1544 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1545
1546#undef IEM_MC_REL_CALL_S64_AND_FINISH
1547
1548/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1549 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1550DECL_INLINE_THROW(uint32_t)
1551iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1552 uint8_t idxInstr)
1553{
1554 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1555 off = iemNativeRegFlushPendingWrites(pReNative, off);
1556
1557#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1558 Assert(pReNative->Core.offPc == 0);
1559
1560 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1561#endif
1562
1563 /* Allocate a temporary PC register. */
1564 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1565 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1566 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1567
1568 /* Update the RIP to get the return address. */
1569 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1570
1571 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1572 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1573 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1574 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1575
1576 /* Perform Perform the return address to the guest stack. */
1577 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1578 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1579
1580 /* RSP got changed, so do this again. */
1581 off = iemNativeRegFlushPendingWrites(pReNative, off);
1582
1583 /* Store the result. */
1584 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1585
1586 /* Need to transfer the shadow information to the new RIP register. */
1587 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1588 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1589 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1590
1591 return off;
1592}
1593
1594
1595/*********************************************************************************************************************************
1596* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1597*********************************************************************************************************************************/
1598
1599DECL_FORCE_INLINE_THROW(uint32_t)
1600iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1601 uint16_t cbPopAdd, uint8_t idxRegTmp)
1602{
1603 /* Use16BitSp: */
1604#ifdef RT_ARCH_AMD64
1605 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1606 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1607 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1608 RT_NOREF(idxRegTmp);
1609#elif defined(RT_ARCH_ARM64)
1610 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1611 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1612 /* add tmp, regrsp, #cbMem */
1613 uint16_t const cbCombined = cbMem + cbPopAdd;
1614 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1615 if (cbCombined >= RT_BIT_32(12))
1616 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1617 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1618 /* and tmp, tmp, #0xffff */
1619 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1620 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1621 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1622 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1623#else
1624# error "Port me"
1625#endif
1626 return off;
1627}
1628
1629
1630DECL_FORCE_INLINE_THROW(uint32_t)
1631iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1632 uint16_t cbPopAdd)
1633{
1634 /* Use32BitSp: */
1635 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1636 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1637 return off;
1638}
1639
1640
1641/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1642#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1643 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1644
1645/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1646#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1647 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1648
1649/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1650#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1651 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1652
1653/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1654 * clears flags. */
1655#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1656 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1657 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1658
1659/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1660 * clears flags. */
1661#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1662 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1663 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1664
1665/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1666 * clears flags. */
1667#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1668 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1669 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1670
1671/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1672DECL_INLINE_THROW(uint32_t)
1673iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1674 IEMMODE enmEffOpSize, uint8_t idxInstr)
1675{
1676 RT_NOREF(cbInstr);
1677
1678#ifdef VBOX_STRICT
1679 /*
1680 * Check that the fExec flags we've got make sense.
1681 */
1682 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1683#endif
1684
1685 /*
1686 * To keep things simple we have to commit any pending writes first as we
1687 * may end up making calls.
1688 */
1689 off = iemNativeRegFlushPendingWrites(pReNative, off);
1690
1691 /*
1692 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1693 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1694 * directly as the effective stack pointer.
1695 * (Code structure is very similar to that of PUSH)
1696 *
1697 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1698 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1699 * aren't commonly used (or useful) and thus not in need of optimizing.
1700 *
1701 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1702 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1703 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1704 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1705 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1706 */
1707 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1708 ? sizeof(uint64_t)
1709 : enmEffOpSize == IEMMODE_32BIT
1710 ? sizeof(uint32_t)
1711 : sizeof(uint16_t);
1712 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1713 uintptr_t const pfnFunction = fFlat
1714 ? enmEffOpSize == IEMMODE_64BIT
1715 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1716 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1717 : enmEffOpSize == IEMMODE_32BIT
1718 ? (uintptr_t)iemNativeHlpStackFetchU32
1719 : (uintptr_t)iemNativeHlpStackFetchU16;
1720 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1721 fFlat ? kIemNativeGstRegUse_ForUpdate : kIemNativeGstRegUse_Calculation,
1722 true /*fNoVolatileRegs*/);
1723 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1724 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1725 * will be the resulting register value. */
1726 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1727
1728 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1729 if (fFlat)
1730 Assert(idxRegEffSp == idxRegRsp);
1731 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1732 {
1733 Assert(idxRegEffSp != idxRegRsp);
1734 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1735 kIemNativeGstRegUse_ReadOnly);
1736#ifdef RT_ARCH_AMD64
1737 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1738#else
1739 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1740#endif
1741 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1742 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1743 offFixupJumpToUseOtherBitSp = off;
1744 if (enmEffOpSize == IEMMODE_32BIT)
1745 {
1746 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1747 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1748 }
1749 else
1750 {
1751 Assert(enmEffOpSize == IEMMODE_16BIT);
1752 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1753 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1754 idxRegMemResult);
1755 }
1756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1757 }
1758 /* SpUpdateEnd: */
1759 uint32_t const offLabelSpUpdateEnd = off;
1760
1761 /*
1762 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1763 * we're skipping lookup).
1764 */
1765 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1766 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1767 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1768 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1769 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1770 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1771 : UINT32_MAX;
1772
1773 if (!TlbState.fSkip)
1774 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1775 else
1776 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1777
1778 /*
1779 * Use16BitSp:
1780 */
1781 if (!fFlat)
1782 {
1783#ifdef RT_ARCH_AMD64
1784 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1785#else
1786 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1787#endif
1788 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1789 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1790 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1791 idxRegMemResult);
1792 else
1793 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1794 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1795 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1796 }
1797
1798 /*
1799 * TlbMiss:
1800 *
1801 * Call helper to do the pushing.
1802 */
1803 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1804
1805#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1806 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1807#else
1808 RT_NOREF(idxInstr);
1809#endif
1810
1811 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1812 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1813 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1814 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1815
1816
1817 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1818 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1819 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1820
1821 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1822 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1823
1824 /* Done setting up parameters, make the call. */
1825 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1826
1827 /* Move the return register content to idxRegMemResult. */
1828 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1830
1831 /* Restore variables and guest shadow registers to volatile registers. */
1832 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1833 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1834
1835#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1836 if (!TlbState.fSkip)
1837 {
1838 /* end of TlbMiss - Jump to the done label. */
1839 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1840 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1841
1842 /*
1843 * TlbLookup:
1844 */
1845 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1846 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1847
1848 /*
1849 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1850 */
1851 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1852# ifdef IEM_WITH_TLB_STATISTICS
1853 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1854 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1855# endif
1856 switch (cbMem)
1857 {
1858 case 2:
1859 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1860 break;
1861 case 4:
1862 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1863 break;
1864 case 8:
1865 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1866 break;
1867 default:
1868 AssertFailed();
1869 }
1870
1871 TlbState.freeRegsAndReleaseVars(pReNative);
1872
1873 /*
1874 * TlbDone:
1875 *
1876 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1877 * commit the popped register value.
1878 */
1879 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1880 }
1881#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1882
1883 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1884 if (!f64Bit)
1885/** @todo we can skip this test in FLAT 32-bit mode. */
1886 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1887 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1888 else if (enmEffOpSize == IEMMODE_64BIT)
1889 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1890
1891 /* Complete RSP calculation for FLAT mode. */
1892 if (idxRegEffSp == idxRegRsp)
1893 {
1894 if (enmEffOpSize == IEMMODE_64BIT)
1895 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1896 else
1897 {
1898 Assert(enmEffOpSize == IEMMODE_32BIT);
1899 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1900 }
1901 }
1902
1903 /* Commit the result and clear any current guest shadows for RIP. */
1904 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1905 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1906 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1907
1908 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1909 if (!fFlat)
1910 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1911
1912 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1913 if (idxRegEffSp != idxRegRsp)
1914 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1915 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1916 return off;
1917}
1918
1919
1920/*********************************************************************************************************************************
1921* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1922*********************************************************************************************************************************/
1923
1924#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1925 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1926
1927/**
1928 * Emits code to check if a \#NM exception should be raised.
1929 *
1930 * @returns New code buffer offset, UINT32_MAX on failure.
1931 * @param pReNative The native recompile state.
1932 * @param off The code buffer offset.
1933 * @param idxInstr The current instruction.
1934 */
1935DECL_INLINE_THROW(uint32_t)
1936iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1937{
1938#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1939 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1940
1941 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1942 {
1943#endif
1944 /*
1945 * Make sure we don't have any outstanding guest register writes as we may
1946 * raise an #NM and all guest register must be up to date in CPUMCTX.
1947 */
1948 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1949 off = iemNativeRegFlushPendingWrites(pReNative, off);
1950
1951#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1952 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1953#else
1954 RT_NOREF(idxInstr);
1955#endif
1956
1957 /* Allocate a temporary CR0 register. */
1958 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
1959
1960 /*
1961 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1962 * return raisexcpt();
1963 */
1964 /* Test and jump. */
1965 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, kIemNativeExitReason_RaiseNm);
1966
1967 /* Free but don't flush the CR0 register. */
1968 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1969
1970#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1971 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1972 }
1973 else
1974 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1975#endif
1976
1977 return off;
1978}
1979
1980
1981#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1982 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1983
1984/**
1985 * Emits code to check if a \#NM exception should be raised.
1986 *
1987 * @returns New code buffer offset, UINT32_MAX on failure.
1988 * @param pReNative The native recompile state.
1989 * @param off The code buffer offset.
1990 * @param idxInstr The current instruction.
1991 */
1992DECL_INLINE_THROW(uint32_t)
1993iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1994{
1995#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1996 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
1997
1998 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
1999 {
2000#endif
2001 /*
2002 * Make sure we don't have any outstanding guest register writes as we may
2003 * raise an #NM and all guest register must be up to date in CPUMCTX.
2004 */
2005 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2006 off = iemNativeRegFlushPendingWrites(pReNative, off);
2007
2008#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2009 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2010#else
2011 RT_NOREF(idxInstr);
2012#endif
2013
2014 /* Allocate a temporary CR0 register. */
2015 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
2016
2017 /*
2018 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2019 * return raisexcpt();
2020 */
2021 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2022 /* Test and jump. */
2023 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeExitReason_RaiseNm);
2024
2025 /* Free the CR0 register. */
2026 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2027
2028#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2029 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2030 }
2031 else
2032 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2033#endif
2034
2035 return off;
2036}
2037
2038
2039#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2040 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2041
2042/**
2043 * Emits code to check if a \#MF exception should be raised.
2044 *
2045 * @returns New code buffer offset, UINT32_MAX on failure.
2046 * @param pReNative The native recompile state.
2047 * @param off The code buffer offset.
2048 * @param idxInstr The current instruction.
2049 */
2050DECL_INLINE_THROW(uint32_t)
2051iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2052{
2053 /*
2054 * Make sure we don't have any outstanding guest register writes as we may
2055 * raise an #MF and all guest register must be up to date in CPUMCTX.
2056 */
2057 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2058 off = iemNativeRegFlushPendingWrites(pReNative, off);
2059
2060#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2061 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2062#else
2063 RT_NOREF(idxInstr);
2064#endif
2065
2066 /* Allocate a temporary FSW register. */
2067 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
2068
2069 /*
2070 * if (FSW & X86_FSW_ES != 0)
2071 * return raisexcpt();
2072 */
2073 /* Test and jump. */
2074 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeExitReason_RaiseMf);
2075
2076 /* Free but don't flush the FSW register. */
2077 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2078
2079 return off;
2080}
2081
2082
2083#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2084 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2085
2086/**
2087 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2088 *
2089 * @returns New code buffer offset, UINT32_MAX on failure.
2090 * @param pReNative The native recompile state.
2091 * @param off The code buffer offset.
2092 * @param idxInstr The current instruction.
2093 */
2094DECL_INLINE_THROW(uint32_t)
2095iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2096{
2097#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2098 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2099
2100 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2101 {
2102#endif
2103 /*
2104 * Make sure we don't have any outstanding guest register writes as we may
2105 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2106 */
2107 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2108 off = iemNativeRegFlushPendingWrites(pReNative, off);
2109
2110#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2111 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2112#else
2113 RT_NOREF(idxInstr);
2114#endif
2115
2116 /* Allocate a temporary CR0 and CR4 register. */
2117 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2118 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2119 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2120
2121 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2122#ifdef RT_ARCH_AMD64
2123 /*
2124 * We do a modified test here:
2125 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2126 * else { goto RaiseSseRelated; }
2127 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2128 * all targets except the 386, which doesn't support SSE, this should
2129 * be a safe assumption.
2130 */
2131 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2132 //pCodeBuf[off++] = 0xcc;
2133 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2134 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2135 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2136 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2137 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2138 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeExitReason_RaiseSseRelated, kIemNativeInstrCond_ne);
2139
2140#elif defined(RT_ARCH_ARM64)
2141 /*
2142 * We do a modified test here:
2143 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2144 * else { goto RaiseSseRelated; }
2145 */
2146 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2147 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2148 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2149 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2150 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2151 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2152 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2153 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2154 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2155 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2156 kIemNativeExitReason_RaiseSseRelated);
2157
2158#else
2159# error "Port me!"
2160#endif
2161
2162 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2163 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2164 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2165 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2166
2167#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2168 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2169 }
2170 else
2171 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2172#endif
2173
2174 return off;
2175}
2176
2177
2178#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2179 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2180
2181/**
2182 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2183 *
2184 * @returns New code buffer offset, UINT32_MAX on failure.
2185 * @param pReNative The native recompile state.
2186 * @param off The code buffer offset.
2187 * @param idxInstr The current instruction.
2188 */
2189DECL_INLINE_THROW(uint32_t)
2190iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2191{
2192#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2193 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2194
2195 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2196 {
2197#endif
2198 /*
2199 * Make sure we don't have any outstanding guest register writes as we may
2200 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2201 */
2202 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2203 off = iemNativeRegFlushPendingWrites(pReNative, off);
2204
2205#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2206 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2207#else
2208 RT_NOREF(idxInstr);
2209#endif
2210
2211 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2212 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2213 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2214 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2215 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2216
2217 /*
2218 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2219 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2220 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2221 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2222 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2223 * { likely }
2224 * else { goto RaiseAvxRelated; }
2225 */
2226#ifdef RT_ARCH_AMD64
2227 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2228 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2229 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2230 ^ 0x1a) ) { likely }
2231 else { goto RaiseAvxRelated; } */
2232 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2233 //pCodeBuf[off++] = 0xcc;
2234 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2235 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2236 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2237 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2238 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2239 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2240 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2241 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2242 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2243 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2244 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeExitReason_RaiseAvxRelated, kIemNativeInstrCond_ne);
2245
2246#elif defined(RT_ARCH_ARM64)
2247 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2248 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2249 else { goto RaiseAvxRelated; } */
2250 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2251 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2252 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2253 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2254 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2255 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2256 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2257 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2258 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2259 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2260 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2261 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2262 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2263 kIemNativeExitReason_RaiseAvxRelated);
2264
2265#else
2266# error "Port me!"
2267#endif
2268
2269 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2270 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2271 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2272 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2273#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2274 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2275 }
2276 else
2277 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2278#endif
2279
2280 return off;
2281}
2282
2283
2284#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2285#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2286 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2287
2288/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2289DECL_INLINE_THROW(uint32_t)
2290iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2291{
2292 /*
2293 * Make sure we don't have any outstanding guest register writes as we may
2294 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2295 */
2296 off = iemNativeRegFlushPendingWrites(pReNative, off);
2297
2298#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2299 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2300#else
2301 RT_NOREF(idxInstr);
2302#endif
2303
2304 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
2305 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2306
2307 /* mov tmp, varmxcsr */
2308 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2309 /* tmp &= X86_MXCSR_XCPT_MASK */
2310 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2311 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2312 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2313 /* tmp = ~tmp */
2314 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2315 /* tmp &= mxcsr */
2316 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2317 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2318 kIemNativeExitReason_RaiseSseAvxFpRelated);
2319
2320 /* Free but don't flush the MXCSR register. */
2321 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2322 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2323
2324 return off;
2325}
2326#endif
2327
2328
2329#define IEM_MC_RAISE_DIVIDE_ERROR() \
2330 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2331
2332/**
2333 * Emits code to raise a \#DE.
2334 *
2335 * @returns New code buffer offset, UINT32_MAX on failure.
2336 * @param pReNative The native recompile state.
2337 * @param off The code buffer offset.
2338 * @param idxInstr The current instruction.
2339 */
2340DECL_INLINE_THROW(uint32_t)
2341iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2342{
2343 /*
2344 * Make sure we don't have any outstanding guest register writes as we may
2345 */
2346 off = iemNativeRegFlushPendingWrites(pReNative, off);
2347
2348#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2349 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2350#else
2351 RT_NOREF(idxInstr);
2352#endif
2353
2354 /* raise \#DE exception unconditionally. */
2355 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_RaiseDe);
2356}
2357
2358
2359#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2360 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2361
2362/**
2363 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2364 *
2365 * @returns New code buffer offset, UINT32_MAX on failure.
2366 * @param pReNative The native recompile state.
2367 * @param off The code buffer offset.
2368 * @param idxInstr The current instruction.
2369 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2370 * @param cbAlign The alignment in bytes to check against.
2371 */
2372DECL_INLINE_THROW(uint32_t)
2373iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
2374{
2375 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2376 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2377
2378 /*
2379 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2380 */
2381 off = iemNativeRegFlushPendingWrites(pReNative, off);
2382
2383#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2384 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2385#else
2386 RT_NOREF(idxInstr);
2387#endif
2388
2389 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2390
2391 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2392 kIemNativeExitReason_RaiseGp0);
2393
2394 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2395 return off;
2396}
2397
2398
2399/*********************************************************************************************************************************
2400* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2401*********************************************************************************************************************************/
2402
2403/**
2404 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2405 *
2406 * @returns Pointer to the condition stack entry on success, NULL on failure
2407 * (too many nestings)
2408 */
2409DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2410{
2411#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2412 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2413#endif
2414
2415 uint32_t const idxStack = pReNative->cCondDepth;
2416 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2417
2418 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2419 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2420
2421 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2422 pEntry->fInElse = false;
2423 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2424 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2425
2426 return pEntry;
2427}
2428
2429
2430/**
2431 * Start of the if-block, snapshotting the register and variable state.
2432 */
2433DECL_INLINE_THROW(void)
2434iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2435{
2436 Assert(offIfBlock != UINT32_MAX);
2437 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2438 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2439 Assert(!pEntry->fInElse);
2440
2441 /* Define the start of the IF block if request or for disassembly purposes. */
2442 if (idxLabelIf != UINT32_MAX)
2443 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2444#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2445 else
2446 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2447#else
2448 RT_NOREF(offIfBlock);
2449#endif
2450
2451#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2452 Assert(pReNative->Core.offPc == 0);
2453#endif
2454
2455 /* Copy the initial state so we can restore it in the 'else' block. */
2456 pEntry->InitialState = pReNative->Core;
2457}
2458
2459
2460#define IEM_MC_ELSE() } while (0); \
2461 off = iemNativeEmitElse(pReNative, off); \
2462 do {
2463
2464/** Emits code related to IEM_MC_ELSE. */
2465DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2466{
2467 /* Check sanity and get the conditional stack entry. */
2468 Assert(off != UINT32_MAX);
2469 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2470 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2471 Assert(!pEntry->fInElse);
2472
2473#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2474 /* Writeback any dirty shadow registers. */
2475 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2476 * in one of the branches and leave guest registers already dirty before the start of the if
2477 * block alone. */
2478 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2479#endif
2480
2481 /* Jump to the endif */
2482 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2483
2484 /* Define the else label and enter the else part of the condition. */
2485 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2486 pEntry->fInElse = true;
2487
2488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2489 Assert(pReNative->Core.offPc == 0);
2490#endif
2491
2492 /* Snapshot the core state so we can do a merge at the endif and restore
2493 the snapshot we took at the start of the if-block. */
2494 pEntry->IfFinalState = pReNative->Core;
2495 pReNative->Core = pEntry->InitialState;
2496
2497 return off;
2498}
2499
2500
2501#define IEM_MC_ENDIF() } while (0); \
2502 off = iemNativeEmitEndIf(pReNative, off)
2503
2504/** Emits code related to IEM_MC_ENDIF. */
2505DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2506{
2507 /* Check sanity and get the conditional stack entry. */
2508 Assert(off != UINT32_MAX);
2509 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2510 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2511
2512#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2513 Assert(pReNative->Core.offPc == 0);
2514#endif
2515#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2516 /* Writeback any dirty shadow registers (else branch). */
2517 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2518 * in one of the branches and leave guest registers already dirty before the start of the if
2519 * block alone. */
2520 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2521#endif
2522
2523 /*
2524 * Now we have find common group with the core state at the end of the
2525 * if-final. Use the smallest common denominator and just drop anything
2526 * that isn't the same in both states.
2527 */
2528 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2529 * which is why we're doing this at the end of the else-block.
2530 * But we'd need more info about future for that to be worth the effort. */
2531 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2532#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2533 Assert( pOther->bmGstRegShadowDirty == 0
2534 && pReNative->Core.bmGstRegShadowDirty == 0);
2535#endif
2536
2537 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2538 {
2539 /* shadow guest stuff first. */
2540 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2541 if (fGstRegs)
2542 {
2543 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2544 do
2545 {
2546 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2547 fGstRegs &= ~RT_BIT_64(idxGstReg);
2548
2549 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2550 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2551 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2552 {
2553 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2554 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2555
2556#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2557 /* Writeback any dirty shadow registers we are about to unshadow. */
2558 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2559#endif
2560 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2561 }
2562 } while (fGstRegs);
2563 }
2564 else
2565 {
2566 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2567#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2568 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2569#endif
2570 }
2571
2572 /* Check variables next. For now we must require them to be identical
2573 or stuff we can recreate. */
2574 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2575 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2576 if (fVars)
2577 {
2578 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2579 do
2580 {
2581 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2582 fVars &= ~RT_BIT_32(idxVar);
2583
2584 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2585 {
2586 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2587 continue;
2588 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2589 {
2590 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2591 if (idxHstReg != UINT8_MAX)
2592 {
2593 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2594 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2595 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2596 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2597 }
2598 continue;
2599 }
2600 }
2601 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2602 continue;
2603
2604 /* Irreconcilable, so drop it. */
2605 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2606 if (idxHstReg != UINT8_MAX)
2607 {
2608 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2609 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2610 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2611 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2612 }
2613 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2614 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2615 } while (fVars);
2616 }
2617
2618 /* Finally, check that the host register allocations matches. */
2619 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2620 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2621 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2622 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2623 }
2624
2625 /*
2626 * Define the endif label and maybe the else one if we're still in the 'if' part.
2627 */
2628 if (!pEntry->fInElse)
2629 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2630 else
2631 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2632 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2633
2634 /* Pop the conditional stack.*/
2635 pReNative->cCondDepth -= 1;
2636
2637 return off;
2638}
2639
2640
2641#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2642 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2643 do {
2644
2645/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2646DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2647{
2648 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2649 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2650
2651 /* Get the eflags. */
2652 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2653 kIemNativeGstRegUse_ReadOnly);
2654
2655 /* Test and jump. */
2656 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2657
2658 /* Free but don't flush the EFlags register. */
2659 iemNativeRegFreeTmp(pReNative, idxEflReg);
2660
2661 /* Make a copy of the core state now as we start the if-block. */
2662 iemNativeCondStartIfBlock(pReNative, off);
2663
2664 return off;
2665}
2666
2667
2668#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2669 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2670 do {
2671
2672/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2673DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2674{
2675 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2676 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2677
2678 /* Get the eflags. */
2679 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2680 kIemNativeGstRegUse_ReadOnly);
2681
2682 /* Test and jump. */
2683 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2684
2685 /* Free but don't flush the EFlags register. */
2686 iemNativeRegFreeTmp(pReNative, idxEflReg);
2687
2688 /* Make a copy of the core state now as we start the if-block. */
2689 iemNativeCondStartIfBlock(pReNative, off);
2690
2691 return off;
2692}
2693
2694
2695#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2696 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2697 do {
2698
2699/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2700DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2701{
2702 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2703 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2704
2705 /* Get the eflags. */
2706 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2707 kIemNativeGstRegUse_ReadOnly);
2708
2709 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2710 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2711
2712 /* Test and jump. */
2713 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2714
2715 /* Free but don't flush the EFlags register. */
2716 iemNativeRegFreeTmp(pReNative, idxEflReg);
2717
2718 /* Make a copy of the core state now as we start the if-block. */
2719 iemNativeCondStartIfBlock(pReNative, off);
2720
2721 return off;
2722}
2723
2724
2725#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2726 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2727 do {
2728
2729/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2730DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2731{
2732 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2733 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2734
2735 /* Get the eflags. */
2736 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2737 kIemNativeGstRegUse_ReadOnly);
2738
2739 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2740 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2741
2742 /* Test and jump. */
2743 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2744
2745 /* Free but don't flush the EFlags register. */
2746 iemNativeRegFreeTmp(pReNative, idxEflReg);
2747
2748 /* Make a copy of the core state now as we start the if-block. */
2749 iemNativeCondStartIfBlock(pReNative, off);
2750
2751 return off;
2752}
2753
2754
2755#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2756 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2757 do {
2758
2759#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2760 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2761 do {
2762
2763/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2764DECL_INLINE_THROW(uint32_t)
2765iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2766 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2767{
2768 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2769 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2770
2771 /* Get the eflags. */
2772 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2773 kIemNativeGstRegUse_ReadOnly);
2774
2775 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2776 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2777
2778 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2779 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2780 Assert(iBitNo1 != iBitNo2);
2781
2782#ifdef RT_ARCH_AMD64
2783 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2784
2785 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2786 if (iBitNo1 > iBitNo2)
2787 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2788 else
2789 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2790 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2791
2792#elif defined(RT_ARCH_ARM64)
2793 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2794 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2795
2796 /* and tmpreg, eflreg, #1<<iBitNo1 */
2797 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2798
2799 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2800 if (iBitNo1 > iBitNo2)
2801 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2802 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2803 else
2804 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2805 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2806
2807 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2808
2809#else
2810# error "Port me"
2811#endif
2812
2813 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2814 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2815 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2816
2817 /* Free but don't flush the EFlags and tmp registers. */
2818 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2819 iemNativeRegFreeTmp(pReNative, idxEflReg);
2820
2821 /* Make a copy of the core state now as we start the if-block. */
2822 iemNativeCondStartIfBlock(pReNative, off);
2823
2824 return off;
2825}
2826
2827
2828#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2829 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2830 do {
2831
2832#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2833 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2834 do {
2835
2836/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2837 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2838DECL_INLINE_THROW(uint32_t)
2839iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2840 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2841{
2842 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2843 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2844
2845 /* We need an if-block label for the non-inverted variant. */
2846 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2847 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2848
2849 /* Get the eflags. */
2850 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2851 kIemNativeGstRegUse_ReadOnly);
2852
2853 /* Translate the flag masks to bit numbers. */
2854 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2855 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2856
2857 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2858 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2859 Assert(iBitNo1 != iBitNo);
2860
2861 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2862 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2863 Assert(iBitNo2 != iBitNo);
2864 Assert(iBitNo2 != iBitNo1);
2865
2866#ifdef RT_ARCH_AMD64
2867 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2868#elif defined(RT_ARCH_ARM64)
2869 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2870#endif
2871
2872 /* Check for the lone bit first. */
2873 if (!fInverted)
2874 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2875 else
2876 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2877
2878 /* Then extract and compare the other two bits. */
2879#ifdef RT_ARCH_AMD64
2880 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2881 if (iBitNo1 > iBitNo2)
2882 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2883 else
2884 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2885 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2886
2887#elif defined(RT_ARCH_ARM64)
2888 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2889
2890 /* and tmpreg, eflreg, #1<<iBitNo1 */
2891 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2892
2893 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2894 if (iBitNo1 > iBitNo2)
2895 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2896 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2897 else
2898 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2899 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2900
2901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2902
2903#else
2904# error "Port me"
2905#endif
2906
2907 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2908 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2909 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2910
2911 /* Free but don't flush the EFlags and tmp registers. */
2912 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2913 iemNativeRegFreeTmp(pReNative, idxEflReg);
2914
2915 /* Make a copy of the core state now as we start the if-block. */
2916 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2917
2918 return off;
2919}
2920
2921
2922#define IEM_MC_IF_CX_IS_NZ() \
2923 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2924 do {
2925
2926/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2927DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2928{
2929 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2930
2931 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2932 kIemNativeGstRegUse_ReadOnly);
2933 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2934 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2935
2936 iemNativeCondStartIfBlock(pReNative, off);
2937 return off;
2938}
2939
2940
2941#define IEM_MC_IF_ECX_IS_NZ() \
2942 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2943 do {
2944
2945#define IEM_MC_IF_RCX_IS_NZ() \
2946 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2947 do {
2948
2949/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2950DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2951{
2952 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2953
2954 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2955 kIemNativeGstRegUse_ReadOnly);
2956 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2957 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2958
2959 iemNativeCondStartIfBlock(pReNative, off);
2960 return off;
2961}
2962
2963
2964#define IEM_MC_IF_CX_IS_NOT_ONE() \
2965 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2966 do {
2967
2968/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2969DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2970{
2971 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2972
2973 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2974 kIemNativeGstRegUse_ReadOnly);
2975#ifdef RT_ARCH_AMD64
2976 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2977#else
2978 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2979 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2980 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2981#endif
2982 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2983
2984 iemNativeCondStartIfBlock(pReNative, off);
2985 return off;
2986}
2987
2988
2989#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2990 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2991 do {
2992
2993#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2994 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2995 do {
2996
2997/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2998DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2999{
3000 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3001
3002 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3003 kIemNativeGstRegUse_ReadOnly);
3004 if (f64Bit)
3005 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3006 else
3007 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3008 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3009
3010 iemNativeCondStartIfBlock(pReNative, off);
3011 return off;
3012}
3013
3014
3015#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3016 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3017 do {
3018
3019#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3020 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3021 do {
3022
3023/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3024 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3025DECL_INLINE_THROW(uint32_t)
3026iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3027{
3028 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3029 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3030
3031 /* We have to load both RCX and EFLAGS before we can start branching,
3032 otherwise we'll end up in the else-block with an inconsistent
3033 register allocator state.
3034 Doing EFLAGS first as it's more likely to be loaded, right? */
3035 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3036 kIemNativeGstRegUse_ReadOnly);
3037 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3038 kIemNativeGstRegUse_ReadOnly);
3039
3040 /** @todo we could reduce this to a single branch instruction by spending a
3041 * temporary register and some setnz stuff. Not sure if loops are
3042 * worth it. */
3043 /* Check CX. */
3044#ifdef RT_ARCH_AMD64
3045 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3046#else
3047 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3048 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3049 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3050#endif
3051
3052 /* Check the EFlags bit. */
3053 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3054 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3055 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3056 !fCheckIfSet /*fJmpIfSet*/);
3057
3058 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3059 iemNativeRegFreeTmp(pReNative, idxEflReg);
3060
3061 iemNativeCondStartIfBlock(pReNative, off);
3062 return off;
3063}
3064
3065
3066#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3067 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3068 do {
3069
3070#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3071 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3072 do {
3073
3074#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3075 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3076 do {
3077
3078#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3079 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3080 do {
3081
3082/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3083 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3084 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3085 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3086DECL_INLINE_THROW(uint32_t)
3087iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3088 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3089{
3090 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3091 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3092
3093 /* We have to load both RCX and EFLAGS before we can start branching,
3094 otherwise we'll end up in the else-block with an inconsistent
3095 register allocator state.
3096 Doing EFLAGS first as it's more likely to be loaded, right? */
3097 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3098 kIemNativeGstRegUse_ReadOnly);
3099 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3100 kIemNativeGstRegUse_ReadOnly);
3101
3102 /** @todo we could reduce this to a single branch instruction by spending a
3103 * temporary register and some setnz stuff. Not sure if loops are
3104 * worth it. */
3105 /* Check RCX/ECX. */
3106 if (f64Bit)
3107 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3108 else
3109 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3110
3111 /* Check the EFlags bit. */
3112 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3113 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3114 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3115 !fCheckIfSet /*fJmpIfSet*/);
3116
3117 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3118 iemNativeRegFreeTmp(pReNative, idxEflReg);
3119
3120 iemNativeCondStartIfBlock(pReNative, off);
3121 return off;
3122}
3123
3124
3125#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3126 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3127 do {
3128
3129/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3130DECL_INLINE_THROW(uint32_t)
3131iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3132{
3133 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3134
3135 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3136 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3137 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3138 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3139
3140 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3141
3142 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3143
3144 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3145
3146 iemNativeCondStartIfBlock(pReNative, off);
3147 return off;
3148}
3149
3150
3151#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3152 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3153 do {
3154
3155/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3156DECL_INLINE_THROW(uint32_t)
3157iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3158{
3159 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3160 Assert(iGReg < 16);
3161
3162 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3163 kIemNativeGstRegUse_ReadOnly);
3164
3165 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3166
3167 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3168
3169 iemNativeCondStartIfBlock(pReNative, off);
3170 return off;
3171}
3172
3173
3174
3175/*********************************************************************************************************************************
3176* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3177*********************************************************************************************************************************/
3178
3179#define IEM_MC_NOREF(a_Name) \
3180 RT_NOREF_PV(a_Name)
3181
3182#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3183 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3184
3185#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3186 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3187
3188#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3189 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3190
3191#define IEM_MC_LOCAL(a_Type, a_Name) \
3192 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3193
3194#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3195 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3196
3197#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3198 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3199
3200
3201/**
3202 * Sets the host register for @a idxVarRc to @a idxReg.
3203 *
3204 * The register must not be allocated. Any guest register shadowing will be
3205 * implictly dropped by this call.
3206 *
3207 * The variable must not have any register associated with it (causes
3208 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3209 * implied.
3210 *
3211 * @returns idxReg
3212 * @param pReNative The recompiler state.
3213 * @param idxVar The variable.
3214 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3215 * @param off For recording in debug info.
3216 *
3217 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3218 */
3219DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3220{
3221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3222 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3223 Assert(!pVar->fRegAcquired);
3224 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3225 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3226 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3227
3228 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3229 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3230
3231 iemNativeVarSetKindToStack(pReNative, idxVar);
3232 pVar->idxReg = idxReg;
3233
3234 return idxReg;
3235}
3236
3237
3238/**
3239 * A convenient helper function.
3240 */
3241DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3242 uint8_t idxReg, uint32_t *poff)
3243{
3244 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3245 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3246 return idxReg;
3247}
3248
3249
3250/**
3251 * This is called by IEM_MC_END() to clean up all variables.
3252 */
3253DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3254{
3255 uint32_t const bmVars = pReNative->Core.bmVars;
3256 if (bmVars != 0)
3257 iemNativeVarFreeAllSlow(pReNative, bmVars);
3258 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3259 Assert(pReNative->Core.bmStack == 0);
3260}
3261
3262
3263#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3264
3265/**
3266 * This is called by IEM_MC_FREE_LOCAL.
3267 */
3268DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3269{
3270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3271 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3272 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3273}
3274
3275
3276#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3277
3278/**
3279 * This is called by IEM_MC_FREE_ARG.
3280 */
3281DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3282{
3283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3284 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3285 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3286}
3287
3288
3289#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3290
3291/**
3292 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3293 */
3294DECL_INLINE_THROW(uint32_t)
3295iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3296{
3297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3298 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3299 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3300 Assert( pVarDst->cbVar == sizeof(uint16_t)
3301 || pVarDst->cbVar == sizeof(uint32_t));
3302
3303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3304 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3305 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3306 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3307 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3308
3309 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3310
3311 /*
3312 * Special case for immediates.
3313 */
3314 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3315 {
3316 switch (pVarDst->cbVar)
3317 {
3318 case sizeof(uint16_t):
3319 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3320 break;
3321 case sizeof(uint32_t):
3322 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3323 break;
3324 default: AssertFailed(); break;
3325 }
3326 }
3327 else
3328 {
3329 /*
3330 * The generic solution for now.
3331 */
3332 /** @todo optimize this by having the python script make sure the source
3333 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3334 * statement. Then we could just transfer the register assignments. */
3335 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3336 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3337 switch (pVarDst->cbVar)
3338 {
3339 case sizeof(uint16_t):
3340 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3341 break;
3342 case sizeof(uint32_t):
3343 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3344 break;
3345 default: AssertFailed(); break;
3346 }
3347 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3348 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3349 }
3350 return off;
3351}
3352
3353
3354
3355/*********************************************************************************************************************************
3356* Emitters for IEM_MC_CALL_CIMPL_XXX *
3357*********************************************************************************************************************************/
3358
3359/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3360DECL_INLINE_THROW(uint32_t)
3361iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3362 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3363
3364{
3365 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3366
3367#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3368 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3369 when a calls clobber any of the relevatn control registers. */
3370# if 1
3371 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3372 {
3373 /* Likely as long as call+ret are done via cimpl. */
3374 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3375 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3376 }
3377 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3378 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3379 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3380 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3381 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3382 else
3383 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3384 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3385 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3386
3387# else
3388 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3389 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3390 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3391 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3392 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3393 || pfnCImpl == (uintptr_t)iemCImpl_callf
3394 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3395 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3396 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3397 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3398 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3399# endif
3400#endif
3401
3402 /*
3403 * Do all the call setup and cleanup.
3404 */
3405 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3406
3407 /*
3408 * Load the two or three hidden arguments.
3409 */
3410#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3411 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3412 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3413 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3414#else
3415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3416 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3417#endif
3418
3419 /*
3420 * Make the call and check the return code.
3421 *
3422 * Shadow PC copies are always flushed here, other stuff depends on flags.
3423 * Segment and general purpose registers are explictily flushed via the
3424 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3425 * macros.
3426 */
3427 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3428#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3429 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3430#endif
3431 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3432 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3433 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3434 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3435
3436 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3437}
3438
3439
3440#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3441 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3442
3443/** Emits code for IEM_MC_CALL_CIMPL_1. */
3444DECL_INLINE_THROW(uint32_t)
3445iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3446 uintptr_t pfnCImpl, uint8_t idxArg0)
3447{
3448 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3449 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3450}
3451
3452
3453#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3454 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3455
3456/** Emits code for IEM_MC_CALL_CIMPL_2. */
3457DECL_INLINE_THROW(uint32_t)
3458iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3459 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3460{
3461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3462 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3463 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3464}
3465
3466
3467#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3468 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3469 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3470
3471/** Emits code for IEM_MC_CALL_CIMPL_3. */
3472DECL_INLINE_THROW(uint32_t)
3473iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3474 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3475{
3476 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3479 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3480}
3481
3482
3483#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3484 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3485 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3486
3487/** Emits code for IEM_MC_CALL_CIMPL_4. */
3488DECL_INLINE_THROW(uint32_t)
3489iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3490 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3491{
3492 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3496 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3497}
3498
3499
3500#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3501 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3502 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3503
3504/** Emits code for IEM_MC_CALL_CIMPL_4. */
3505DECL_INLINE_THROW(uint32_t)
3506iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3507 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3508{
3509 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3510 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3511 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3513 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3514 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3515}
3516
3517
3518/** Recompiler debugging: Flush guest register shadow copies. */
3519#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3520
3521
3522
3523/*********************************************************************************************************************************
3524* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3529 */
3530DECL_INLINE_THROW(uint32_t)
3531iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3532 uintptr_t pfnAImpl, uint8_t cArgs)
3533{
3534 if (idxVarRc != UINT8_MAX)
3535 {
3536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3537 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3538 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3539 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3540 }
3541
3542 /*
3543 * Do all the call setup and cleanup.
3544 *
3545 * It is only required to flush pending guest register writes in call volatile registers as
3546 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3547 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3548 * no matter the fFlushPendingWrites parameter.
3549 */
3550 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3551
3552 /*
3553 * Make the call and update the return code variable if we've got one.
3554 */
3555 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3556 if (idxVarRc != UINT8_MAX)
3557 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3558
3559 return off;
3560}
3561
3562
3563
3564#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3565 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3566
3567#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3568 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3569
3570/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3571DECL_INLINE_THROW(uint32_t)
3572iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3573{
3574 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3575}
3576
3577
3578#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3579 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3580
3581#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3582 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3583
3584/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3585DECL_INLINE_THROW(uint32_t)
3586iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3587{
3588 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3589 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3590}
3591
3592
3593#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3594 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3595
3596#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3597 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3598
3599/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3600DECL_INLINE_THROW(uint32_t)
3601iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3602 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3603{
3604 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3605 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3606 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3607}
3608
3609
3610#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3611 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3612
3613#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3614 IEM_MC_LOCAL(a_rcType, a_rc); \
3615 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3616
3617/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3618DECL_INLINE_THROW(uint32_t)
3619iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3620 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3621{
3622 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3623 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3624 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3625 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3626}
3627
3628
3629#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3630 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3631
3632#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3633 IEM_MC_LOCAL(a_rcType, a_rc); \
3634 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3635
3636/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3637DECL_INLINE_THROW(uint32_t)
3638iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3639 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3640{
3641 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3642 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3643 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3644 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3645 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3646}
3647
3648
3649
3650/*********************************************************************************************************************************
3651* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3652*********************************************************************************************************************************/
3653
3654#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3655 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3656
3657#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3658 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3659
3660#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3661 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3662
3663#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3664 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3665
3666
3667/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3668 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3669DECL_INLINE_THROW(uint32_t)
3670iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3671{
3672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3674 Assert(iGRegEx < 20);
3675
3676 /* Same discussion as in iemNativeEmitFetchGregU16 */
3677 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3678 kIemNativeGstRegUse_ReadOnly);
3679
3680 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3681 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3682
3683 /* The value is zero-extended to the full 64-bit host register width. */
3684 if (iGRegEx < 16)
3685 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3686 else
3687 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3688
3689 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3690 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3691 return off;
3692}
3693
3694
3695#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3696 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3697
3698#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3699 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3700
3701#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3702 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3703
3704/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3705DECL_INLINE_THROW(uint32_t)
3706iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3707{
3708 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3710 Assert(iGRegEx < 20);
3711
3712 /* Same discussion as in iemNativeEmitFetchGregU16 */
3713 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3714 kIemNativeGstRegUse_ReadOnly);
3715
3716 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3717 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3718
3719 if (iGRegEx < 16)
3720 {
3721 switch (cbSignExtended)
3722 {
3723 case sizeof(uint16_t):
3724 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3725 break;
3726 case sizeof(uint32_t):
3727 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3728 break;
3729 case sizeof(uint64_t):
3730 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3731 break;
3732 default: AssertFailed(); break;
3733 }
3734 }
3735 else
3736 {
3737 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3738 switch (cbSignExtended)
3739 {
3740 case sizeof(uint16_t):
3741 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3742 break;
3743 case sizeof(uint32_t):
3744 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3745 break;
3746 case sizeof(uint64_t):
3747 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3748 break;
3749 default: AssertFailed(); break;
3750 }
3751 }
3752
3753 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3754 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3755 return off;
3756}
3757
3758
3759
3760#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3761 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3762
3763#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3764 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3765
3766#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3767 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3768
3769/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3770DECL_INLINE_THROW(uint32_t)
3771iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3772{
3773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3775 Assert(iGReg < 16);
3776
3777 /*
3778 * We can either just load the low 16-bit of the GPR into a host register
3779 * for the variable, or we can do so via a shadow copy host register. The
3780 * latter will avoid having to reload it if it's being stored later, but
3781 * will waste a host register if it isn't touched again. Since we don't
3782 * know what going to happen, we choose the latter for now.
3783 */
3784 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3785 kIemNativeGstRegUse_ReadOnly);
3786
3787 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3788 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3789 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3790 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3791
3792 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3793 return off;
3794}
3795
3796
3797#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3798 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3799
3800#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3801 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3802
3803/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3804DECL_INLINE_THROW(uint32_t)
3805iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3806{
3807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3808 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3809 Assert(iGReg < 16);
3810
3811 /*
3812 * We can either just load the low 16-bit of the GPR into a host register
3813 * for the variable, or we can do so via a shadow copy host register. The
3814 * latter will avoid having to reload it if it's being stored later, but
3815 * will waste a host register if it isn't touched again. Since we don't
3816 * know what going to happen, we choose the latter for now.
3817 */
3818 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3819 kIemNativeGstRegUse_ReadOnly);
3820
3821 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3822 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3823 if (cbSignExtended == sizeof(uint32_t))
3824 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3825 else
3826 {
3827 Assert(cbSignExtended == sizeof(uint64_t));
3828 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3829 }
3830 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3831
3832 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3833 return off;
3834}
3835
3836
3837#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3838 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3839
3840#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3841 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3842
3843/** Emits code for IEM_MC_FETCH_GREG_U32. */
3844DECL_INLINE_THROW(uint32_t)
3845iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3846{
3847 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3848 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3849 Assert(iGReg < 16);
3850
3851 /*
3852 * We can either just load the low 16-bit of the GPR into a host register
3853 * for the variable, or we can do so via a shadow copy host register. The
3854 * latter will avoid having to reload it if it's being stored later, but
3855 * will waste a host register if it isn't touched again. Since we don't
3856 * know what going to happen, we choose the latter for now.
3857 */
3858 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3859 kIemNativeGstRegUse_ReadOnly);
3860
3861 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3862 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3863 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3864 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3865
3866 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3867 return off;
3868}
3869
3870
3871#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3872 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3873
3874/** Emits code for IEM_MC_FETCH_GREG_U32. */
3875DECL_INLINE_THROW(uint32_t)
3876iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3877{
3878 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3879 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3880 Assert(iGReg < 16);
3881
3882 /*
3883 * We can either just load the low 32-bit of the GPR into a host register
3884 * for the variable, or we can do so via a shadow copy host register. The
3885 * latter will avoid having to reload it if it's being stored later, but
3886 * will waste a host register if it isn't touched again. Since we don't
3887 * know what going to happen, we choose the latter for now.
3888 */
3889 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3890 kIemNativeGstRegUse_ReadOnly);
3891
3892 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3893 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3894 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3895 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3896
3897 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3898 return off;
3899}
3900
3901
3902#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3903 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3904
3905#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3906 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3907
3908/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3909 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3910DECL_INLINE_THROW(uint32_t)
3911iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3912{
3913 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3914 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3915 Assert(iGReg < 16);
3916
3917 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3918 kIemNativeGstRegUse_ReadOnly);
3919
3920 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3921 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3922 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3923 /** @todo name the register a shadow one already? */
3924 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3925
3926 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3927 return off;
3928}
3929
3930
3931#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3932#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3933 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3934
3935/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3936DECL_INLINE_THROW(uint32_t)
3937iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3938{
3939 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3940 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3941 Assert(iGRegLo < 16 && iGRegHi < 16);
3942
3943 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3944 kIemNativeGstRegUse_ReadOnly);
3945 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3946 kIemNativeGstRegUse_ReadOnly);
3947
3948 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3949 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3950 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3951 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3952
3953 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3954 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3955 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3956 return off;
3957}
3958#endif
3959
3960
3961/*********************************************************************************************************************************
3962* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3963*********************************************************************************************************************************/
3964
3965#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3966 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3967
3968/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3969DECL_INLINE_THROW(uint32_t)
3970iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3971{
3972 Assert(iGRegEx < 20);
3973 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3974 kIemNativeGstRegUse_ForUpdate);
3975#ifdef RT_ARCH_AMD64
3976 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3977
3978 /* To the lowest byte of the register: mov r8, imm8 */
3979 if (iGRegEx < 16)
3980 {
3981 if (idxGstTmpReg >= 8)
3982 pbCodeBuf[off++] = X86_OP_REX_B;
3983 else if (idxGstTmpReg >= 4)
3984 pbCodeBuf[off++] = X86_OP_REX;
3985 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3986 pbCodeBuf[off++] = u8Value;
3987 }
3988 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3989 else if (idxGstTmpReg < 4)
3990 {
3991 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3992 pbCodeBuf[off++] = u8Value;
3993 }
3994 else
3995 {
3996 /* ror reg64, 8 */
3997 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3998 pbCodeBuf[off++] = 0xc1;
3999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4000 pbCodeBuf[off++] = 8;
4001
4002 /* mov reg8, imm8 */
4003 if (idxGstTmpReg >= 8)
4004 pbCodeBuf[off++] = X86_OP_REX_B;
4005 else if (idxGstTmpReg >= 4)
4006 pbCodeBuf[off++] = X86_OP_REX;
4007 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4008 pbCodeBuf[off++] = u8Value;
4009
4010 /* rol reg64, 8 */
4011 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4012 pbCodeBuf[off++] = 0xc1;
4013 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4014 pbCodeBuf[off++] = 8;
4015 }
4016
4017#elif defined(RT_ARCH_ARM64)
4018 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4019 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4020 if (iGRegEx < 16)
4021 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4022 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4023 else
4024 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4025 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4026 iemNativeRegFreeTmp(pReNative, idxImmReg);
4027
4028#else
4029# error "Port me!"
4030#endif
4031
4032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4033
4034#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4035 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4036#endif
4037
4038 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4039 return off;
4040}
4041
4042
4043#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4044 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4045
4046/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4047DECL_INLINE_THROW(uint32_t)
4048iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4049{
4050 Assert(iGRegEx < 20);
4051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4052
4053 /*
4054 * If it's a constant value (unlikely) we treat this as a
4055 * IEM_MC_STORE_GREG_U8_CONST statement.
4056 */
4057 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4058 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4059 { /* likely */ }
4060 else
4061 {
4062 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4063 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4064 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4065 }
4066
4067 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4068 kIemNativeGstRegUse_ForUpdate);
4069 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4070
4071#ifdef RT_ARCH_AMD64
4072 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4073 if (iGRegEx < 16)
4074 {
4075 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4076 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4077 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4078 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4079 pbCodeBuf[off++] = X86_OP_REX;
4080 pbCodeBuf[off++] = 0x8a;
4081 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4082 }
4083 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4084 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4085 {
4086 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4087 pbCodeBuf[off++] = 0x8a;
4088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4089 }
4090 else
4091 {
4092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4093
4094 /* ror reg64, 8 */
4095 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4096 pbCodeBuf[off++] = 0xc1;
4097 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4098 pbCodeBuf[off++] = 8;
4099
4100 /* mov reg8, reg8(r/m) */
4101 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4102 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4103 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4104 pbCodeBuf[off++] = X86_OP_REX;
4105 pbCodeBuf[off++] = 0x8a;
4106 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4107
4108 /* rol reg64, 8 */
4109 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4110 pbCodeBuf[off++] = 0xc1;
4111 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4112 pbCodeBuf[off++] = 8;
4113 }
4114
4115#elif defined(RT_ARCH_ARM64)
4116 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4117 or
4118 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4119 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4120 if (iGRegEx < 16)
4121 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4122 else
4123 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4124
4125#else
4126# error "Port me!"
4127#endif
4128 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4129
4130 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4131
4132#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4133 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4134#endif
4135 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4136 return off;
4137}
4138
4139
4140
4141#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4142 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4143
4144/** Emits code for IEM_MC_STORE_GREG_U16. */
4145DECL_INLINE_THROW(uint32_t)
4146iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4147{
4148 Assert(iGReg < 16);
4149 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4150 kIemNativeGstRegUse_ForUpdate);
4151#ifdef RT_ARCH_AMD64
4152 /* mov reg16, imm16 */
4153 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4154 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4155 if (idxGstTmpReg >= 8)
4156 pbCodeBuf[off++] = X86_OP_REX_B;
4157 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4158 pbCodeBuf[off++] = RT_BYTE1(uValue);
4159 pbCodeBuf[off++] = RT_BYTE2(uValue);
4160
4161#elif defined(RT_ARCH_ARM64)
4162 /* movk xdst, #uValue, lsl #0 */
4163 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4164 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4165
4166#else
4167# error "Port me!"
4168#endif
4169
4170 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4171
4172#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4173 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4174#endif
4175 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4176 return off;
4177}
4178
4179
4180#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4181 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4182
4183/** Emits code for IEM_MC_STORE_GREG_U16. */
4184DECL_INLINE_THROW(uint32_t)
4185iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4186{
4187 Assert(iGReg < 16);
4188 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4189
4190 /*
4191 * If it's a constant value (unlikely) we treat this as a
4192 * IEM_MC_STORE_GREG_U16_CONST statement.
4193 */
4194 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4195 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4196 { /* likely */ }
4197 else
4198 {
4199 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4200 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4201 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4202 }
4203
4204 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4205 kIemNativeGstRegUse_ForUpdate);
4206
4207#ifdef RT_ARCH_AMD64
4208 /* mov reg16, reg16 or [mem16] */
4209 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4210 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4211 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4212 {
4213 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4214 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4215 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4216 pbCodeBuf[off++] = 0x8b;
4217 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4218 }
4219 else
4220 {
4221 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4222 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4223 if (idxGstTmpReg >= 8)
4224 pbCodeBuf[off++] = X86_OP_REX_R;
4225 pbCodeBuf[off++] = 0x8b;
4226 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4227 }
4228
4229#elif defined(RT_ARCH_ARM64)
4230 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4231 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4232 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4233 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4234 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4235
4236#else
4237# error "Port me!"
4238#endif
4239
4240 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4241
4242#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4243 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4244#endif
4245 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4246 return off;
4247}
4248
4249
4250#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4251 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4252
4253/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4256{
4257 Assert(iGReg < 16);
4258 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4259 kIemNativeGstRegUse_ForFullWrite);
4260 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4261#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4262 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4263#endif
4264 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4265 return off;
4266}
4267
4268
4269#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4270 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4271
4272/** Emits code for IEM_MC_STORE_GREG_U32. */
4273DECL_INLINE_THROW(uint32_t)
4274iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4275{
4276 Assert(iGReg < 16);
4277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4278
4279 /*
4280 * If it's a constant value (unlikely) we treat this as a
4281 * IEM_MC_STORE_GREG_U32_CONST statement.
4282 */
4283 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4284 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4285 { /* likely */ }
4286 else
4287 {
4288 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4289 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4290 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4291 }
4292
4293 /*
4294 * For the rest we allocate a guest register for the variable and writes
4295 * it to the CPUMCTX structure.
4296 */
4297 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4298#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4299 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4300#else
4301 RT_NOREF(idxVarReg);
4302#endif
4303#ifdef VBOX_STRICT
4304 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4305#endif
4306 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4307 return off;
4308}
4309
4310
4311#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4312 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4313
4314/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4315DECL_INLINE_THROW(uint32_t)
4316iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4317{
4318 Assert(iGReg < 16);
4319 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4320 kIemNativeGstRegUse_ForFullWrite);
4321 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4322#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4323 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4324#endif
4325 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4326 return off;
4327}
4328
4329
4330#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4331 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4332
4333#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4334 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4335
4336/** Emits code for IEM_MC_STORE_GREG_U64. */
4337DECL_INLINE_THROW(uint32_t)
4338iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4339{
4340 Assert(iGReg < 16);
4341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4342
4343 /*
4344 * If it's a constant value (unlikely) we treat this as a
4345 * IEM_MC_STORE_GREG_U64_CONST statement.
4346 */
4347 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4348 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4349 { /* likely */ }
4350 else
4351 {
4352 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4354 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4355 }
4356
4357 /*
4358 * For the rest we allocate a guest register for the variable and writes
4359 * it to the CPUMCTX structure.
4360 */
4361 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4362#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4363 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4364#else
4365 RT_NOREF(idxVarReg);
4366#endif
4367 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4368 return off;
4369}
4370
4371
4372#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4373 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4374
4375/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4376DECL_INLINE_THROW(uint32_t)
4377iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4378{
4379 Assert(iGReg < 16);
4380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4381 kIemNativeGstRegUse_ForUpdate);
4382 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4383#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4384 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4385#endif
4386 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4387 return off;
4388}
4389
4390
4391#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4392#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4393 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4394
4395/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4396DECL_INLINE_THROW(uint32_t)
4397iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4398{
4399 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4400 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4401 Assert(iGRegLo < 16 && iGRegHi < 16);
4402
4403 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4404 kIemNativeGstRegUse_ForFullWrite);
4405 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4406 kIemNativeGstRegUse_ForFullWrite);
4407
4408 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4409 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4410 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4411 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4412
4413 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4414 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4415 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4416 return off;
4417}
4418#endif
4419
4420
4421/*********************************************************************************************************************************
4422* General purpose register manipulation (add, sub). *
4423*********************************************************************************************************************************/
4424
4425#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4426 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4427
4428/** Emits code for IEM_MC_ADD_GREG_U16. */
4429DECL_INLINE_THROW(uint32_t)
4430iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4431{
4432 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4433 kIemNativeGstRegUse_ForUpdate);
4434
4435#ifdef RT_ARCH_AMD64
4436 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4437 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4438 if (idxGstTmpReg >= 8)
4439 pbCodeBuf[off++] = X86_OP_REX_B;
4440 if (uAddend == 1)
4441 {
4442 pbCodeBuf[off++] = 0xff; /* inc */
4443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4444 }
4445 else
4446 {
4447 pbCodeBuf[off++] = 0x81;
4448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4449 pbCodeBuf[off++] = uAddend;
4450 pbCodeBuf[off++] = 0;
4451 }
4452
4453#else
4454 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4455 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4456
4457 /* sub tmp, gstgrp, uAddend */
4458 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4459
4460 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4461 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4462
4463 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4464#endif
4465
4466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4467
4468#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4469 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4470#endif
4471
4472 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4473 return off;
4474}
4475
4476
4477#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4478 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4479
4480#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4481 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4482
4483/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4484DECL_INLINE_THROW(uint32_t)
4485iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4486{
4487 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4488 kIemNativeGstRegUse_ForUpdate);
4489
4490#ifdef RT_ARCH_AMD64
4491 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4492 if (f64Bit)
4493 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4494 else if (idxGstTmpReg >= 8)
4495 pbCodeBuf[off++] = X86_OP_REX_B;
4496 if (uAddend == 1)
4497 {
4498 pbCodeBuf[off++] = 0xff; /* inc */
4499 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4500 }
4501 else if (uAddend < 128)
4502 {
4503 pbCodeBuf[off++] = 0x83; /* add */
4504 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4505 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4506 }
4507 else
4508 {
4509 pbCodeBuf[off++] = 0x81; /* add */
4510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4511 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4512 pbCodeBuf[off++] = 0;
4513 pbCodeBuf[off++] = 0;
4514 pbCodeBuf[off++] = 0;
4515 }
4516
4517#else
4518 /* sub tmp, gstgrp, uAddend */
4519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4520 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4521
4522#endif
4523
4524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4525
4526#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4527 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4528#endif
4529
4530 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4531 return off;
4532}
4533
4534
4535
4536#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4537 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4538
4539/** Emits code for IEM_MC_SUB_GREG_U16. */
4540DECL_INLINE_THROW(uint32_t)
4541iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4542{
4543 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4544 kIemNativeGstRegUse_ForUpdate);
4545
4546#ifdef RT_ARCH_AMD64
4547 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4548 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4549 if (idxGstTmpReg >= 8)
4550 pbCodeBuf[off++] = X86_OP_REX_B;
4551 if (uSubtrahend == 1)
4552 {
4553 pbCodeBuf[off++] = 0xff; /* dec */
4554 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4555 }
4556 else
4557 {
4558 pbCodeBuf[off++] = 0x81;
4559 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4560 pbCodeBuf[off++] = uSubtrahend;
4561 pbCodeBuf[off++] = 0;
4562 }
4563
4564#else
4565 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4566 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4567
4568 /* sub tmp, gstgrp, uSubtrahend */
4569 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4570
4571 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4572 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4573
4574 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4575#endif
4576
4577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4578
4579#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4580 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4581#endif
4582
4583 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4584 return off;
4585}
4586
4587
4588#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4589 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4590
4591#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4592 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4593
4594/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4595DECL_INLINE_THROW(uint32_t)
4596iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4597{
4598 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4599 kIemNativeGstRegUse_ForUpdate);
4600
4601#ifdef RT_ARCH_AMD64
4602 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4603 if (f64Bit)
4604 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4605 else if (idxGstTmpReg >= 8)
4606 pbCodeBuf[off++] = X86_OP_REX_B;
4607 if (uSubtrahend == 1)
4608 {
4609 pbCodeBuf[off++] = 0xff; /* dec */
4610 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4611 }
4612 else if (uSubtrahend < 128)
4613 {
4614 pbCodeBuf[off++] = 0x83; /* sub */
4615 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4616 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4617 }
4618 else
4619 {
4620 pbCodeBuf[off++] = 0x81; /* sub */
4621 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4622 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4623 pbCodeBuf[off++] = 0;
4624 pbCodeBuf[off++] = 0;
4625 pbCodeBuf[off++] = 0;
4626 }
4627
4628#else
4629 /* sub tmp, gstgrp, uSubtrahend */
4630 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4631 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4632
4633#endif
4634
4635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4636
4637#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4638 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4639#endif
4640
4641 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4642 return off;
4643}
4644
4645
4646#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4647 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4648
4649#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4650 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4651
4652#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4653 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4654
4655#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4656 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4657
4658/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4659DECL_INLINE_THROW(uint32_t)
4660iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4661{
4662#ifdef VBOX_STRICT
4663 switch (cbMask)
4664 {
4665 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4666 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4667 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4668 case sizeof(uint64_t): break;
4669 default: AssertFailedBreak();
4670 }
4671#endif
4672
4673 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4674 kIemNativeGstRegUse_ForUpdate);
4675
4676 switch (cbMask)
4677 {
4678 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4679 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4680 break;
4681 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4682 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4683 break;
4684 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4685 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4686 break;
4687 case sizeof(uint64_t):
4688 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4689 break;
4690 default: AssertFailedBreak();
4691 }
4692
4693 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4694
4695#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4696 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4697#endif
4698
4699 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4700 return off;
4701}
4702
4703
4704#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4705 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4706
4707#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4708 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4709
4710#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4711 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4712
4713#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4714 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4715
4716/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4717DECL_INLINE_THROW(uint32_t)
4718iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4719{
4720#ifdef VBOX_STRICT
4721 switch (cbMask)
4722 {
4723 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4724 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4725 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4726 case sizeof(uint64_t): break;
4727 default: AssertFailedBreak();
4728 }
4729#endif
4730
4731 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4732 kIemNativeGstRegUse_ForUpdate);
4733
4734 switch (cbMask)
4735 {
4736 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4737 case sizeof(uint16_t):
4738 case sizeof(uint64_t):
4739 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4740 break;
4741 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4742 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4743 break;
4744 default: AssertFailedBreak();
4745 }
4746
4747 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4748
4749#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4750 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4751#endif
4752
4753 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4754 return off;
4755}
4756
4757
4758/*********************************************************************************************************************************
4759* Local/Argument variable manipulation (add, sub, and, or). *
4760*********************************************************************************************************************************/
4761
4762#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4763 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4764
4765#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4766 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4767
4768#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4769 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4770
4771#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4772 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4773
4774
4775#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4776 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4777
4778#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4779 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4780
4781#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4782 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4783
4784/** Emits code for AND'ing a local and a constant value. */
4785DECL_INLINE_THROW(uint32_t)
4786iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4787{
4788#ifdef VBOX_STRICT
4789 switch (cbMask)
4790 {
4791 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4792 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4793 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4794 case sizeof(uint64_t): break;
4795 default: AssertFailedBreak();
4796 }
4797#endif
4798
4799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4800 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4801
4802 if (cbMask <= sizeof(uint32_t))
4803 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4804 else
4805 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4806
4807 iemNativeVarRegisterRelease(pReNative, idxVar);
4808 return off;
4809}
4810
4811
4812#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4813 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4814
4815#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4816 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4817
4818#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4819 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4820
4821#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4822 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4823
4824/** Emits code for OR'ing a local and a constant value. */
4825DECL_INLINE_THROW(uint32_t)
4826iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4827{
4828#ifdef VBOX_STRICT
4829 switch (cbMask)
4830 {
4831 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4832 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4833 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4834 case sizeof(uint64_t): break;
4835 default: AssertFailedBreak();
4836 }
4837#endif
4838
4839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4840 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4841
4842 if (cbMask <= sizeof(uint32_t))
4843 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4844 else
4845 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4846
4847 iemNativeVarRegisterRelease(pReNative, idxVar);
4848 return off;
4849}
4850
4851
4852#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4853 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4854
4855#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4856 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4857
4858#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4859 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4860
4861/** Emits code for reversing the byte order in a local value. */
4862DECL_INLINE_THROW(uint32_t)
4863iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4864{
4865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4866 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4867
4868 switch (cbLocal)
4869 {
4870 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4871 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4872 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4873 default: AssertFailedBreak();
4874 }
4875
4876 iemNativeVarRegisterRelease(pReNative, idxVar);
4877 return off;
4878}
4879
4880
4881#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4882 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4883
4884#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4885 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4886
4887#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4888 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4889
4890/** Emits code for shifting left a local value. */
4891DECL_INLINE_THROW(uint32_t)
4892iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4893{
4894#ifdef VBOX_STRICT
4895 switch (cbLocal)
4896 {
4897 case sizeof(uint8_t): Assert(cShift < 8); break;
4898 case sizeof(uint16_t): Assert(cShift < 16); break;
4899 case sizeof(uint32_t): Assert(cShift < 32); break;
4900 case sizeof(uint64_t): Assert(cShift < 64); break;
4901 default: AssertFailedBreak();
4902 }
4903#endif
4904
4905 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4906 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4907
4908 if (cbLocal <= sizeof(uint32_t))
4909 {
4910 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4911 if (cbLocal < sizeof(uint32_t))
4912 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4913 cbLocal == sizeof(uint16_t)
4914 ? UINT32_C(0xffff)
4915 : UINT32_C(0xff));
4916 }
4917 else
4918 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4919
4920 iemNativeVarRegisterRelease(pReNative, idxVar);
4921 return off;
4922}
4923
4924
4925#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4926 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4927
4928#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4929 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4930
4931#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4932 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4933
4934/** Emits code for shifting left a local value. */
4935DECL_INLINE_THROW(uint32_t)
4936iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4937{
4938#ifdef VBOX_STRICT
4939 switch (cbLocal)
4940 {
4941 case sizeof(int8_t): Assert(cShift < 8); break;
4942 case sizeof(int16_t): Assert(cShift < 16); break;
4943 case sizeof(int32_t): Assert(cShift < 32); break;
4944 case sizeof(int64_t): Assert(cShift < 64); break;
4945 default: AssertFailedBreak();
4946 }
4947#endif
4948
4949 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4951
4952 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4953 if (cbLocal == sizeof(uint8_t))
4954 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4955 else if (cbLocal == sizeof(uint16_t))
4956 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4957
4958 if (cbLocal <= sizeof(uint32_t))
4959 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4960 else
4961 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4962
4963 iemNativeVarRegisterRelease(pReNative, idxVar);
4964 return off;
4965}
4966
4967
4968#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4969 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4970
4971#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4972 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4973
4974#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4975 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4976
4977/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4978DECL_INLINE_THROW(uint32_t)
4979iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4980{
4981 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4982 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4983 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4984 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4985
4986 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4987 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4988
4989 /* Need to sign extend the value. */
4990 if (cbLocal <= sizeof(uint32_t))
4991 {
4992/** @todo ARM64: In case of boredone, the extended add instruction can do the
4993 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
4994 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4995
4996 switch (cbLocal)
4997 {
4998 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
4999 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5000 default: AssertFailed();
5001 }
5002
5003 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5004 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5005 }
5006 else
5007 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5008
5009 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5010 iemNativeVarRegisterRelease(pReNative, idxVar);
5011 return off;
5012}
5013
5014
5015
5016/*********************************************************************************************************************************
5017* EFLAGS *
5018*********************************************************************************************************************************/
5019
5020#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5021# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5022#else
5023# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5024 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5025
5026DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5027{
5028 if (fEflOutput)
5029 {
5030 PVMCPUCC const pVCpu = pReNative->pVCpu;
5031# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5032 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5033 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5034 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5035# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5036 if (fEflOutput & (a_fEfl)) \
5037 { \
5038 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5039 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5040 else \
5041 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5042 } else do { } while (0)
5043# else
5044 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5045 IEMLIVENESSBIT const LivenessClobbered =
5046 {
5047 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5048 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5049 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5050 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5051 };
5052 IEMLIVENESSBIT const LivenessDelayable =
5053 {
5054 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5055 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5056 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5057 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5058 };
5059# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5060 if (fEflOutput & (a_fEfl)) \
5061 { \
5062 if (LivenessClobbered.a_fLivenessMember) \
5063 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5064 else if (LivenessDelayable.a_fLivenessMember) \
5065 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5066 else \
5067 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5068 } else do { } while (0)
5069# endif
5070 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5071 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5072 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5073 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5074 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5075 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5076 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5077# undef CHECK_FLAG_AND_UPDATE_STATS
5078 }
5079 RT_NOREF(fEflInput);
5080}
5081#endif /* VBOX_WITH_STATISTICS */
5082
5083#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5084#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5085 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5086
5087/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5090 uint32_t fEflInput, uint32_t fEflOutput)
5091{
5092 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5093 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5094 RT_NOREF(fEflInput, fEflOutput);
5095
5096#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5097# ifdef VBOX_STRICT
5098 if ( pReNative->idxCurCall != 0
5099 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5100 {
5101 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5102 uint32_t const fBoth = fEflInput | fEflOutput;
5103# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5104 AssertMsg( !(fBoth & (a_fElfConst)) \
5105 || (!(fEflInput & (a_fElfConst)) \
5106 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5107 : !(fEflOutput & (a_fElfConst)) \
5108 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5109 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5110 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5111 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5112 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5113 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5114 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5115 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5116 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5117 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5118# undef ASSERT_ONE_EFL
5119 }
5120# endif
5121#endif
5122
5123 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5124
5125 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5126 * the existing shadow copy. */
5127 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5128 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5129 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5130 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5131 return off;
5132}
5133
5134
5135
5136/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5137 * start using it with custom native code emission (inlining assembly
5138 * instruction helpers). */
5139#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5140#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5141 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5142 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5143
5144#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5145#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5146 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5147 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5148
5149/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5150DECL_INLINE_THROW(uint32_t)
5151iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5152 bool fUpdateSkipping)
5153{
5154 RT_NOREF(fEflOutput);
5155 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5156 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5157
5158#ifdef VBOX_STRICT
5159 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5160 uint32_t offFixup = off;
5161 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5162 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5163 iemNativeFixupFixedJump(pReNative, offFixup, off);
5164
5165 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5166 offFixup = off;
5167 off = iemNativeEmitJzToFixed(pReNative, off, off);
5168 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5169 iemNativeFixupFixedJump(pReNative, offFixup, off);
5170
5171 /** @todo validate that only bits in the fElfOutput mask changed. */
5172#endif
5173
5174#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5175 if (fUpdateSkipping)
5176 {
5177 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5178 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5179 else
5180 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5181 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5182 }
5183#else
5184 RT_NOREF_PV(fUpdateSkipping);
5185#endif
5186
5187 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5188 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5189 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5190 return off;
5191}
5192
5193
5194typedef enum IEMNATIVEMITEFLOP
5195{
5196 kIemNativeEmitEflOp_Invalid = 0,
5197 kIemNativeEmitEflOp_Set,
5198 kIemNativeEmitEflOp_Clear,
5199 kIemNativeEmitEflOp_Flip
5200} IEMNATIVEMITEFLOP;
5201
5202#define IEM_MC_SET_EFL_BIT(a_fBit) \
5203 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5204
5205#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5206 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5207
5208#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5209 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5210
5211/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5212DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5213{
5214 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5215 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5216
5217 switch (enmOp)
5218 {
5219 case kIemNativeEmitEflOp_Set:
5220 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5221 break;
5222 case kIemNativeEmitEflOp_Clear:
5223 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5224 break;
5225 case kIemNativeEmitEflOp_Flip:
5226 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5227 break;
5228 default:
5229 AssertFailed();
5230 break;
5231 }
5232
5233 /** @todo No delayed writeback for EFLAGS right now. */
5234 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5235
5236 /* Free but don't flush the EFLAGS register. */
5237 iemNativeRegFreeTmp(pReNative, idxEflReg);
5238
5239 return off;
5240}
5241
5242
5243/*********************************************************************************************************************************
5244* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5245*********************************************************************************************************************************/
5246
5247#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5248 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5249
5250#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5251 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5252
5253#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5254 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5255
5256
5257/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5258 * IEM_MC_FETCH_SREG_ZX_U64. */
5259DECL_INLINE_THROW(uint32_t)
5260iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5261{
5262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5263 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5264 Assert(iSReg < X86_SREG_COUNT);
5265
5266 /*
5267 * For now, we will not create a shadow copy of a selector. The rational
5268 * is that since we do not recompile the popping and loading of segment
5269 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5270 * pushing and moving to registers, there is only a small chance that the
5271 * shadow copy will be accessed again before the register is reloaded. One
5272 * scenario would be nested called in 16-bit code, but I doubt it's worth
5273 * the extra register pressure atm.
5274 *
5275 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5276 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5277 * store scencario covered at present (r160730).
5278 */
5279 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5280 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5281 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5282 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5283 return off;
5284}
5285
5286
5287
5288/*********************************************************************************************************************************
5289* Register references. *
5290*********************************************************************************************************************************/
5291
5292#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5293 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5294
5295#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5296 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5297
5298/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5299DECL_INLINE_THROW(uint32_t)
5300iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5301{
5302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5303 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5304 Assert(iGRegEx < 20);
5305
5306 if (iGRegEx < 16)
5307 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5308 else
5309 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5310
5311 /* If we've delayed writing back the register value, flush it now. */
5312 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5313
5314 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5315 if (!fConst)
5316 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5317
5318 return off;
5319}
5320
5321#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5322 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5323
5324#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5325 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5326
5327#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5328 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5329
5330#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5331 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5332
5333#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5334 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5335
5336#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5337 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5338
5339#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5347
5348#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5349 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5350
5351/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5352DECL_INLINE_THROW(uint32_t)
5353iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5354{
5355 Assert(iGReg < 16);
5356 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5357 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5358
5359 /* If we've delayed writing back the register value, flush it now. */
5360 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5361
5362 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5363 if (!fConst)
5364 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5365
5366 return off;
5367}
5368
5369
5370#undef IEM_MC_REF_EFLAGS /* should not be used. */
5371#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5372 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5373 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5374
5375/** Handles IEM_MC_REF_EFLAGS. */
5376DECL_INLINE_THROW(uint32_t)
5377iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5378{
5379 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5380 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5381
5382#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5383 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5384
5385 /* Updating the skipping according to the outputs is a little early, but
5386 we don't have any other hooks for references atm. */
5387 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5388 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5389 else if (fEflOutput & X86_EFL_STATUS_BITS)
5390 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5391 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5392#else
5393 RT_NOREF(fEflInput, fEflOutput);
5394#endif
5395
5396 /* If we've delayed writing back the register value, flush it now. */
5397 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5398
5399 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5400 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5401
5402 return off;
5403}
5404
5405
5406/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5407 * different code from threaded recompiler, maybe it would be helpful. For now
5408 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5409#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5410
5411
5412#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5413 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5414
5415#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5416 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5417
5418#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5419 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5420
5421#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5422 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5423
5424#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5425/* Just being paranoid here. */
5426# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5427AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5428AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5429AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5430AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5431# endif
5432AssertCompileMemberOffset(X86XMMREG, au64, 0);
5433AssertCompileMemberOffset(X86XMMREG, au32, 0);
5434AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5435AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5436
5437# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5438 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5439# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5440 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5441# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5442 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5443# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5444 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5445#endif
5446
5447/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5448DECL_INLINE_THROW(uint32_t)
5449iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5450{
5451 Assert(iXReg < 16);
5452 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5453 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5454
5455 /* If we've delayed writing back the register value, flush it now. */
5456 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5457
5458#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5459 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5460 if (!fConst)
5461 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5462#else
5463 RT_NOREF(fConst);
5464#endif
5465
5466 return off;
5467}
5468
5469
5470
5471/*********************************************************************************************************************************
5472* Effective Address Calculation *
5473*********************************************************************************************************************************/
5474#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5475 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5476
5477/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5478 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5479DECL_INLINE_THROW(uint32_t)
5480iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5481 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5482{
5483 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5484
5485 /*
5486 * Handle the disp16 form with no registers first.
5487 *
5488 * Convert to an immediate value, as that'll delay the register allocation
5489 * and assignment till the memory access / call / whatever and we can use
5490 * a more appropriate register (or none at all).
5491 */
5492 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5493 {
5494 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5495 return off;
5496 }
5497
5498 /* Determin the displacment. */
5499 uint16_t u16EffAddr;
5500 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5501 {
5502 case 0: u16EffAddr = 0; break;
5503 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5504 case 2: u16EffAddr = u16Disp; break;
5505 default: AssertFailedStmt(u16EffAddr = 0);
5506 }
5507
5508 /* Determine the registers involved. */
5509 uint8_t idxGstRegBase;
5510 uint8_t idxGstRegIndex;
5511 switch (bRm & X86_MODRM_RM_MASK)
5512 {
5513 case 0:
5514 idxGstRegBase = X86_GREG_xBX;
5515 idxGstRegIndex = X86_GREG_xSI;
5516 break;
5517 case 1:
5518 idxGstRegBase = X86_GREG_xBX;
5519 idxGstRegIndex = X86_GREG_xDI;
5520 break;
5521 case 2:
5522 idxGstRegBase = X86_GREG_xBP;
5523 idxGstRegIndex = X86_GREG_xSI;
5524 break;
5525 case 3:
5526 idxGstRegBase = X86_GREG_xBP;
5527 idxGstRegIndex = X86_GREG_xDI;
5528 break;
5529 case 4:
5530 idxGstRegBase = X86_GREG_xSI;
5531 idxGstRegIndex = UINT8_MAX;
5532 break;
5533 case 5:
5534 idxGstRegBase = X86_GREG_xDI;
5535 idxGstRegIndex = UINT8_MAX;
5536 break;
5537 case 6:
5538 idxGstRegBase = X86_GREG_xBP;
5539 idxGstRegIndex = UINT8_MAX;
5540 break;
5541#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5542 default:
5543#endif
5544 case 7:
5545 idxGstRegBase = X86_GREG_xBX;
5546 idxGstRegIndex = UINT8_MAX;
5547 break;
5548 }
5549
5550 /*
5551 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5552 */
5553 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5554 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5555 kIemNativeGstRegUse_ReadOnly);
5556 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5557 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5558 kIemNativeGstRegUse_ReadOnly)
5559 : UINT8_MAX;
5560#ifdef RT_ARCH_AMD64
5561 if (idxRegIndex == UINT8_MAX)
5562 {
5563 if (u16EffAddr == 0)
5564 {
5565 /* movxz ret, base */
5566 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5567 }
5568 else
5569 {
5570 /* lea ret32, [base64 + disp32] */
5571 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5572 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5573 if (idxRegRet >= 8 || idxRegBase >= 8)
5574 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5575 pbCodeBuf[off++] = 0x8d;
5576 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5577 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5578 else
5579 {
5580 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5581 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5582 }
5583 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5584 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5585 pbCodeBuf[off++] = 0;
5586 pbCodeBuf[off++] = 0;
5587 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5588
5589 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5590 }
5591 }
5592 else
5593 {
5594 /* lea ret32, [index64 + base64 (+ disp32)] */
5595 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5596 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5597 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5598 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5599 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5600 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5601 pbCodeBuf[off++] = 0x8d;
5602 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5603 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5604 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5605 if (bMod == X86_MOD_MEM4)
5606 {
5607 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5608 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5609 pbCodeBuf[off++] = 0;
5610 pbCodeBuf[off++] = 0;
5611 }
5612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5613 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5614 }
5615
5616#elif defined(RT_ARCH_ARM64)
5617 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5618 if (u16EffAddr == 0)
5619 {
5620 if (idxRegIndex == UINT8_MAX)
5621 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5622 else
5623 {
5624 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5625 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5626 }
5627 }
5628 else
5629 {
5630 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5631 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5632 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5633 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5634 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5635 else
5636 {
5637 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5638 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5639 }
5640 if (idxRegIndex != UINT8_MAX)
5641 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5642 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5643 }
5644
5645#else
5646# error "port me"
5647#endif
5648
5649 if (idxRegIndex != UINT8_MAX)
5650 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5651 iemNativeRegFreeTmp(pReNative, idxRegBase);
5652 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5653 return off;
5654}
5655
5656
5657#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5658 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5659
5660/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5661 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5662DECL_INLINE_THROW(uint32_t)
5663iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5664 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5665{
5666 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5667
5668 /*
5669 * Handle the disp32 form with no registers first.
5670 *
5671 * Convert to an immediate value, as that'll delay the register allocation
5672 * and assignment till the memory access / call / whatever and we can use
5673 * a more appropriate register (or none at all).
5674 */
5675 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5676 {
5677 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5678 return off;
5679 }
5680
5681 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5682 uint32_t u32EffAddr = 0;
5683 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5684 {
5685 case 0: break;
5686 case 1: u32EffAddr = (int8_t)u32Disp; break;
5687 case 2: u32EffAddr = u32Disp; break;
5688 default: AssertFailed();
5689 }
5690
5691 /* Get the register (or SIB) value. */
5692 uint8_t idxGstRegBase = UINT8_MAX;
5693 uint8_t idxGstRegIndex = UINT8_MAX;
5694 uint8_t cShiftIndex = 0;
5695 switch (bRm & X86_MODRM_RM_MASK)
5696 {
5697 case 0: idxGstRegBase = X86_GREG_xAX; break;
5698 case 1: idxGstRegBase = X86_GREG_xCX; break;
5699 case 2: idxGstRegBase = X86_GREG_xDX; break;
5700 case 3: idxGstRegBase = X86_GREG_xBX; break;
5701 case 4: /* SIB */
5702 {
5703 /* index /w scaling . */
5704 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5705 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5706 {
5707 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5708 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5709 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5710 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5711 case 4: cShiftIndex = 0; /*no index*/ break;
5712 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5713 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5714 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5715 }
5716
5717 /* base */
5718 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5719 {
5720 case 0: idxGstRegBase = X86_GREG_xAX; break;
5721 case 1: idxGstRegBase = X86_GREG_xCX; break;
5722 case 2: idxGstRegBase = X86_GREG_xDX; break;
5723 case 3: idxGstRegBase = X86_GREG_xBX; break;
5724 case 4:
5725 idxGstRegBase = X86_GREG_xSP;
5726 u32EffAddr += uSibAndRspOffset >> 8;
5727 break;
5728 case 5:
5729 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5730 idxGstRegBase = X86_GREG_xBP;
5731 else
5732 {
5733 Assert(u32EffAddr == 0);
5734 u32EffAddr = u32Disp;
5735 }
5736 break;
5737 case 6: idxGstRegBase = X86_GREG_xSI; break;
5738 case 7: idxGstRegBase = X86_GREG_xDI; break;
5739 }
5740 break;
5741 }
5742 case 5: idxGstRegBase = X86_GREG_xBP; break;
5743 case 6: idxGstRegBase = X86_GREG_xSI; break;
5744 case 7: idxGstRegBase = X86_GREG_xDI; break;
5745 }
5746
5747 /*
5748 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5749 * the start of the function.
5750 */
5751 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5752 {
5753 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5754 return off;
5755 }
5756
5757 /*
5758 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5759 */
5760 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5761 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5762 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5763 kIemNativeGstRegUse_ReadOnly);
5764 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5765 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5766 kIemNativeGstRegUse_ReadOnly);
5767
5768 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5769 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5770 {
5771 idxRegBase = idxRegIndex;
5772 idxRegIndex = UINT8_MAX;
5773 }
5774
5775#ifdef RT_ARCH_AMD64
5776 if (idxRegIndex == UINT8_MAX)
5777 {
5778 if (u32EffAddr == 0)
5779 {
5780 /* mov ret, base */
5781 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5782 }
5783 else
5784 {
5785 /* lea ret32, [base64 + disp32] */
5786 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5787 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5788 if (idxRegRet >= 8 || idxRegBase >= 8)
5789 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5790 pbCodeBuf[off++] = 0x8d;
5791 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5792 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5793 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5794 else
5795 {
5796 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5797 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5798 }
5799 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5800 if (bMod == X86_MOD_MEM4)
5801 {
5802 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5803 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5804 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5805 }
5806 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5807 }
5808 }
5809 else
5810 {
5811 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5812 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5813 if (idxRegBase == UINT8_MAX)
5814 {
5815 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5816 if (idxRegRet >= 8 || idxRegIndex >= 8)
5817 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5818 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5819 pbCodeBuf[off++] = 0x8d;
5820 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5821 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5822 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5823 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5824 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5825 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5826 }
5827 else
5828 {
5829 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5830 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5831 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5832 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5833 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5834 pbCodeBuf[off++] = 0x8d;
5835 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5836 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5837 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5838 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5839 if (bMod != X86_MOD_MEM0)
5840 {
5841 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5842 if (bMod == X86_MOD_MEM4)
5843 {
5844 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5845 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5846 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5847 }
5848 }
5849 }
5850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5851 }
5852
5853#elif defined(RT_ARCH_ARM64)
5854 if (u32EffAddr == 0)
5855 {
5856 if (idxRegIndex == UINT8_MAX)
5857 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5858 else if (idxRegBase == UINT8_MAX)
5859 {
5860 if (cShiftIndex == 0)
5861 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5862 else
5863 {
5864 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5865 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5866 }
5867 }
5868 else
5869 {
5870 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5871 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5872 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5873 }
5874 }
5875 else
5876 {
5877 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5878 {
5879 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5880 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5881 }
5882 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5883 {
5884 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5885 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5886 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5887 }
5888 else
5889 {
5890 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5891 if (idxRegBase != UINT8_MAX)
5892 {
5893 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5894 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5895 }
5896 }
5897 if (idxRegIndex != UINT8_MAX)
5898 {
5899 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5900 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5901 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5902 }
5903 }
5904
5905#else
5906# error "port me"
5907#endif
5908
5909 if (idxRegIndex != UINT8_MAX)
5910 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5911 if (idxRegBase != UINT8_MAX)
5912 iemNativeRegFreeTmp(pReNative, idxRegBase);
5913 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5914 return off;
5915}
5916
5917
5918#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5919 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5920 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5921
5922#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5923 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5924 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5925
5926#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5927 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5928 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5929
5930/**
5931 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5932 *
5933 * @returns New off.
5934 * @param pReNative .
5935 * @param off .
5936 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5937 * bit 4 to REX.X. The two bits are part of the
5938 * REG sub-field, which isn't needed in this
5939 * function.
5940 * @param uSibAndRspOffset Two parts:
5941 * - The first 8 bits make up the SIB byte.
5942 * - The next 8 bits are the fixed RSP/ESP offset
5943 * in case of a pop [xSP].
5944 * @param u32Disp The displacement byte/word/dword, if any.
5945 * @param cbInstr The size of the fully decoded instruction. Used
5946 * for RIP relative addressing.
5947 * @param idxVarRet The result variable number.
5948 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5949 * when calculating the address.
5950 *
5951 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5952 */
5953DECL_INLINE_THROW(uint32_t)
5954iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5955 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5956{
5957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5958
5959 /*
5960 * Special case the rip + disp32 form first.
5961 */
5962 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5963 {
5964#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5965 /* Need to take the current PC offset into account for the displacement, no need to flush here
5966 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5967 u32Disp += pReNative->Core.offPc;
5968#endif
5969
5970 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5971 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5972 kIemNativeGstRegUse_ReadOnly);
5973#ifdef RT_ARCH_AMD64
5974 if (f64Bit)
5975 {
5976 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5977 if ((int32_t)offFinalDisp == offFinalDisp)
5978 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5979 else
5980 {
5981 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5982 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5983 }
5984 }
5985 else
5986 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5987
5988#elif defined(RT_ARCH_ARM64)
5989 if (f64Bit)
5990 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5991 (int64_t)(int32_t)u32Disp + cbInstr);
5992 else
5993 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5994 (int32_t)u32Disp + cbInstr);
5995
5996#else
5997# error "Port me!"
5998#endif
5999 iemNativeRegFreeTmp(pReNative, idxRegPc);
6000 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6001 return off;
6002 }
6003
6004 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6005 int64_t i64EffAddr = 0;
6006 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6007 {
6008 case 0: break;
6009 case 1: i64EffAddr = (int8_t)u32Disp; break;
6010 case 2: i64EffAddr = (int32_t)u32Disp; break;
6011 default: AssertFailed();
6012 }
6013
6014 /* Get the register (or SIB) value. */
6015 uint8_t idxGstRegBase = UINT8_MAX;
6016 uint8_t idxGstRegIndex = UINT8_MAX;
6017 uint8_t cShiftIndex = 0;
6018 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6019 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6020 else /* SIB: */
6021 {
6022 /* index /w scaling . */
6023 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6024 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6025 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6026 if (idxGstRegIndex == 4)
6027 {
6028 /* no index */
6029 cShiftIndex = 0;
6030 idxGstRegIndex = UINT8_MAX;
6031 }
6032
6033 /* base */
6034 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6035 if (idxGstRegBase == 4)
6036 {
6037 /* pop [rsp] hack */
6038 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6039 }
6040 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6041 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6042 {
6043 /* mod=0 and base=5 -> disp32, no base reg. */
6044 Assert(i64EffAddr == 0);
6045 i64EffAddr = (int32_t)u32Disp;
6046 idxGstRegBase = UINT8_MAX;
6047 }
6048 }
6049
6050 /*
6051 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6052 * the start of the function.
6053 */
6054 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6055 {
6056 if (f64Bit)
6057 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6058 else
6059 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6060 return off;
6061 }
6062
6063 /*
6064 * Now emit code that calculates:
6065 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6066 * or if !f64Bit:
6067 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6068 */
6069 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6070 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6071 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6072 kIemNativeGstRegUse_ReadOnly);
6073 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6074 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6075 kIemNativeGstRegUse_ReadOnly);
6076
6077 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6078 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6079 {
6080 idxRegBase = idxRegIndex;
6081 idxRegIndex = UINT8_MAX;
6082 }
6083
6084#ifdef RT_ARCH_AMD64
6085 uint8_t bFinalAdj;
6086 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6087 bFinalAdj = 0; /* likely */
6088 else
6089 {
6090 /* pop [rsp] with a problematic disp32 value. Split out the
6091 RSP offset and add it separately afterwards (bFinalAdj). */
6092 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6093 Assert(idxGstRegBase == X86_GREG_xSP);
6094 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6095 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6096 Assert(bFinalAdj != 0);
6097 i64EffAddr -= bFinalAdj;
6098 Assert((int32_t)i64EffAddr == i64EffAddr);
6099 }
6100 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6101//pReNative->pInstrBuf[off++] = 0xcc;
6102
6103 if (idxRegIndex == UINT8_MAX)
6104 {
6105 if (u32EffAddr == 0)
6106 {
6107 /* mov ret, base */
6108 if (f64Bit)
6109 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6110 else
6111 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6112 }
6113 else
6114 {
6115 /* lea ret, [base + disp32] */
6116 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6117 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6118 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6119 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6120 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6121 | (f64Bit ? X86_OP_REX_W : 0);
6122 pbCodeBuf[off++] = 0x8d;
6123 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6124 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6125 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6126 else
6127 {
6128 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6129 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6130 }
6131 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6132 if (bMod == X86_MOD_MEM4)
6133 {
6134 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6135 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6136 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6137 }
6138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6139 }
6140 }
6141 else
6142 {
6143 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6144 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6145 if (idxRegBase == UINT8_MAX)
6146 {
6147 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6148 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6149 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6150 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6151 | (f64Bit ? X86_OP_REX_W : 0);
6152 pbCodeBuf[off++] = 0x8d;
6153 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6154 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6155 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6156 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6157 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6158 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6159 }
6160 else
6161 {
6162 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6163 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6164 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6165 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6166 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6167 | (f64Bit ? X86_OP_REX_W : 0);
6168 pbCodeBuf[off++] = 0x8d;
6169 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6170 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6171 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6172 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6173 if (bMod != X86_MOD_MEM0)
6174 {
6175 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6176 if (bMod == X86_MOD_MEM4)
6177 {
6178 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6179 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6180 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6181 }
6182 }
6183 }
6184 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6185 }
6186
6187 if (!bFinalAdj)
6188 { /* likely */ }
6189 else
6190 {
6191 Assert(f64Bit);
6192 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6193 }
6194
6195#elif defined(RT_ARCH_ARM64)
6196 if (i64EffAddr == 0)
6197 {
6198 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6199 if (idxRegIndex == UINT8_MAX)
6200 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6201 else if (idxRegBase != UINT8_MAX)
6202 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6203 f64Bit, false /*fSetFlags*/, cShiftIndex);
6204 else
6205 {
6206 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6207 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6208 }
6209 }
6210 else
6211 {
6212 if (f64Bit)
6213 { /* likely */ }
6214 else
6215 i64EffAddr = (int32_t)i64EffAddr;
6216
6217 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6218 {
6219 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6220 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6221 }
6222 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6223 {
6224 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6225 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6226 }
6227 else
6228 {
6229 if (f64Bit)
6230 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6231 else
6232 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6233 if (idxRegBase != UINT8_MAX)
6234 {
6235 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6236 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6237 }
6238 }
6239 if (idxRegIndex != UINT8_MAX)
6240 {
6241 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6242 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6243 f64Bit, false /*fSetFlags*/, cShiftIndex);
6244 }
6245 }
6246
6247#else
6248# error "port me"
6249#endif
6250
6251 if (idxRegIndex != UINT8_MAX)
6252 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6253 if (idxRegBase != UINT8_MAX)
6254 iemNativeRegFreeTmp(pReNative, idxRegBase);
6255 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6256 return off;
6257}
6258
6259
6260/*********************************************************************************************************************************
6261* Memory fetches and stores common *
6262*********************************************************************************************************************************/
6263
6264typedef enum IEMNATIVEMITMEMOP
6265{
6266 kIemNativeEmitMemOp_Store = 0,
6267 kIemNativeEmitMemOp_Fetch,
6268 kIemNativeEmitMemOp_Fetch_Zx_U16,
6269 kIemNativeEmitMemOp_Fetch_Zx_U32,
6270 kIemNativeEmitMemOp_Fetch_Zx_U64,
6271 kIemNativeEmitMemOp_Fetch_Sx_U16,
6272 kIemNativeEmitMemOp_Fetch_Sx_U32,
6273 kIemNativeEmitMemOp_Fetch_Sx_U64
6274} IEMNATIVEMITMEMOP;
6275
6276/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6277 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6278 * (with iSegReg = UINT8_MAX). */
6279DECL_INLINE_THROW(uint32_t)
6280iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6281 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6282 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6283{
6284 /*
6285 * Assert sanity.
6286 */
6287 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6288 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6289 Assert( enmOp != kIemNativeEmitMemOp_Store
6290 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6291 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6292 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6293 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6294 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6295 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6296 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6297 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6298#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6299 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6300 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6301#else
6302 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6303#endif
6304 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6305 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6306#ifdef VBOX_STRICT
6307 if (iSegReg == UINT8_MAX)
6308 {
6309 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6310 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6311 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6312 switch (cbMem)
6313 {
6314 case 1:
6315 Assert( pfnFunction
6316 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6317 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6318 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6319 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6320 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6321 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6322 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6323 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6324 : UINT64_C(0xc000b000a0009000) ));
6325 Assert(!fAlignMaskAndCtl);
6326 break;
6327 case 2:
6328 Assert( pfnFunction
6329 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6330 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6331 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6332 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6333 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6334 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6335 : UINT64_C(0xc000b000a0009000) ));
6336 Assert(fAlignMaskAndCtl <= 1);
6337 break;
6338 case 4:
6339 Assert( pfnFunction
6340 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6341 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6342 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6343 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6344 : UINT64_C(0xc000b000a0009000) ));
6345 Assert(fAlignMaskAndCtl <= 3);
6346 break;
6347 case 8:
6348 Assert( pfnFunction
6349 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6350 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6351 : UINT64_C(0xc000b000a0009000) ));
6352 Assert(fAlignMaskAndCtl <= 7);
6353 break;
6354#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6355 case sizeof(RTUINT128U):
6356 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6357 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6358 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6359 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6360 || ( enmOp == kIemNativeEmitMemOp_Store
6361 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6362 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6363 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6364 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6365 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6366 : fAlignMaskAndCtl <= 15);
6367 break;
6368 case sizeof(RTUINT256U):
6369 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6370 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6371 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6372 || ( enmOp == kIemNativeEmitMemOp_Store
6373 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6374 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6375 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6376 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6377 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6378 : fAlignMaskAndCtl <= 31);
6379 break;
6380#endif
6381 }
6382 }
6383 else
6384 {
6385 Assert(iSegReg < 6);
6386 switch (cbMem)
6387 {
6388 case 1:
6389 Assert( pfnFunction
6390 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6391 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6392 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6393 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6394 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6395 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6396 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6397 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6398 : UINT64_C(0xc000b000a0009000) ));
6399 Assert(!fAlignMaskAndCtl);
6400 break;
6401 case 2:
6402 Assert( pfnFunction
6403 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6404 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6405 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6406 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6407 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6408 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6409 : UINT64_C(0xc000b000a0009000) ));
6410 Assert(fAlignMaskAndCtl <= 1);
6411 break;
6412 case 4:
6413 Assert( pfnFunction
6414 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6415 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6416 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6417 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6418 : UINT64_C(0xc000b000a0009000) ));
6419 Assert(fAlignMaskAndCtl <= 3);
6420 break;
6421 case 8:
6422 Assert( pfnFunction
6423 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6424 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6425 : UINT64_C(0xc000b000a0009000) ));
6426 Assert(fAlignMaskAndCtl <= 7);
6427 break;
6428#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6429 case sizeof(RTUINT128U):
6430 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6431 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6432 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6433 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6434 || ( enmOp == kIemNativeEmitMemOp_Store
6435 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6436 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6437 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6438 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6439 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6440 : fAlignMaskAndCtl <= 15);
6441 break;
6442 case sizeof(RTUINT256U):
6443 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6444 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6445 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6446 || ( enmOp == kIemNativeEmitMemOp_Store
6447 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6448 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6449 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6450 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6451 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6452 : fAlignMaskAndCtl <= 31);
6453 break;
6454#endif
6455 }
6456 }
6457#endif
6458
6459#ifdef VBOX_STRICT
6460 /*
6461 * Check that the fExec flags we've got make sense.
6462 */
6463 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6464#endif
6465
6466 /*
6467 * To keep things simple we have to commit any pending writes first as we
6468 * may end up making calls.
6469 */
6470 /** @todo we could postpone this till we make the call and reload the
6471 * registers after returning from the call. Not sure if that's sensible or
6472 * not, though. */
6473#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6474 off = iemNativeRegFlushPendingWrites(pReNative, off);
6475#else
6476 /* The program counter is treated differently for now. */
6477 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6478#endif
6479
6480#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6481 /*
6482 * Move/spill/flush stuff out of call-volatile registers.
6483 * This is the easy way out. We could contain this to the tlb-miss branch
6484 * by saving and restoring active stuff here.
6485 */
6486 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6487#endif
6488
6489 /*
6490 * Define labels and allocate the result register (trying for the return
6491 * register if we can).
6492 */
6493 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6494#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6495 uint8_t idxRegValueFetch = UINT8_MAX;
6496
6497 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6498 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6499 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6500 else
6501 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6502 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6503 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6504 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6505#else
6506 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6507 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6508 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6509 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6510#endif
6511 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6512
6513#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6514 uint8_t idxRegValueStore = UINT8_MAX;
6515
6516 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6517 idxRegValueStore = !TlbState.fSkip
6518 && enmOp == kIemNativeEmitMemOp_Store
6519 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6520 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6521 : UINT8_MAX;
6522 else
6523 idxRegValueStore = !TlbState.fSkip
6524 && enmOp == kIemNativeEmitMemOp_Store
6525 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6526 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6527 : UINT8_MAX;
6528
6529#else
6530 uint8_t const idxRegValueStore = !TlbState.fSkip
6531 && enmOp == kIemNativeEmitMemOp_Store
6532 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6533 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6534 : UINT8_MAX;
6535#endif
6536 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6537 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6538 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6539 : UINT32_MAX;
6540
6541 /*
6542 * Jump to the TLB lookup code.
6543 */
6544 if (!TlbState.fSkip)
6545 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6546
6547 /*
6548 * TlbMiss:
6549 *
6550 * Call helper to do the fetching.
6551 * We flush all guest register shadow copies here.
6552 */
6553 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6554
6555#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6556 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6557#else
6558 RT_NOREF(idxInstr);
6559#endif
6560
6561#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6562 if (pReNative->Core.offPc)
6563 {
6564 /*
6565 * Update the program counter but restore it at the end of the TlbMiss branch.
6566 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6567 * which are hopefully much more frequent, reducing the amount of memory accesses.
6568 */
6569 /* Allocate a temporary PC register. */
6570 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6571
6572 /* Perform the addition and store the result. */
6573 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6574 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6575
6576 /* Free and flush the PC register. */
6577 iemNativeRegFreeTmp(pReNative, idxPcReg);
6578 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6579 }
6580#endif
6581
6582#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6583 /* Save variables in volatile registers. */
6584 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6585 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6586 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6587 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6588#endif
6589
6590 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6591 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6592#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6593 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6594 {
6595 /*
6596 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6597 *
6598 * @note There was a register variable assigned to the variable for the TlbLookup case above
6599 * which must not be freed or the value loaded into the register will not be synced into the register
6600 * further down the road because the variable doesn't know it had a variable assigned.
6601 *
6602 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6603 * as it will be overwritten anyway.
6604 */
6605 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6606 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6607 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6608 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6609 }
6610 else
6611#endif
6612 if (enmOp == kIemNativeEmitMemOp_Store)
6613 {
6614 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6615 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6616#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6617 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6618#else
6619 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6620 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6621#endif
6622 }
6623
6624 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6625 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6626#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6627 fVolGregMask);
6628#else
6629 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6630#endif
6631
6632 if (iSegReg != UINT8_MAX)
6633 {
6634 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6635 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6636 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6637 }
6638
6639 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6640 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6641
6642 /* Done setting up parameters, make the call. */
6643 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6644
6645 /*
6646 * Put the result in the right register if this is a fetch.
6647 */
6648 if (enmOp != kIemNativeEmitMemOp_Store)
6649 {
6650#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6651 if ( cbMem == sizeof(RTUINT128U)
6652 || cbMem == sizeof(RTUINT256U))
6653 {
6654 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6655
6656 /* Sync the value on the stack with the host register assigned to the variable. */
6657 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6658 }
6659 else
6660#endif
6661 {
6662 Assert(idxRegValueFetch == pVarValue->idxReg);
6663 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6664 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6665 }
6666 }
6667
6668#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6669 /* Restore variables and guest shadow registers to volatile registers. */
6670 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6671 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6672#endif
6673
6674#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6675 if (pReNative->Core.offPc)
6676 {
6677 /*
6678 * Time to restore the program counter to its original value.
6679 */
6680 /* Allocate a temporary PC register. */
6681 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6682
6683 /* Restore the original value. */
6684 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6685 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6686
6687 /* Free and flush the PC register. */
6688 iemNativeRegFreeTmp(pReNative, idxPcReg);
6689 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6690 }
6691#endif
6692
6693#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6694 if (!TlbState.fSkip)
6695 {
6696 /* end of TlbMiss - Jump to the done label. */
6697 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6698 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6699
6700 /*
6701 * TlbLookup:
6702 */
6703 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6704 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6705 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6706
6707 /*
6708 * Emit code to do the actual storing / fetching.
6709 */
6710 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6711# ifdef IEM_WITH_TLB_STATISTICS
6712 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6713 enmOp == kIemNativeEmitMemOp_Store
6714 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6715 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6716# endif
6717 switch (enmOp)
6718 {
6719 case kIemNativeEmitMemOp_Store:
6720 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6721 {
6722 switch (cbMem)
6723 {
6724 case 1:
6725 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6726 break;
6727 case 2:
6728 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6729 break;
6730 case 4:
6731 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6732 break;
6733 case 8:
6734 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6735 break;
6736#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6737 case sizeof(RTUINT128U):
6738 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6739 break;
6740 case sizeof(RTUINT256U):
6741 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6742 break;
6743#endif
6744 default:
6745 AssertFailed();
6746 }
6747 }
6748 else
6749 {
6750 switch (cbMem)
6751 {
6752 case 1:
6753 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6754 idxRegMemResult, TlbState.idxReg1);
6755 break;
6756 case 2:
6757 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6758 idxRegMemResult, TlbState.idxReg1);
6759 break;
6760 case 4:
6761 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6762 idxRegMemResult, TlbState.idxReg1);
6763 break;
6764 case 8:
6765 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6766 idxRegMemResult, TlbState.idxReg1);
6767 break;
6768 default:
6769 AssertFailed();
6770 }
6771 }
6772 break;
6773
6774 case kIemNativeEmitMemOp_Fetch:
6775 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6776 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6777 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6778 switch (cbMem)
6779 {
6780 case 1:
6781 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6782 break;
6783 case 2:
6784 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6785 break;
6786 case 4:
6787 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6788 break;
6789 case 8:
6790 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6791 break;
6792#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6793 case sizeof(RTUINT128U):
6794 /*
6795 * No need to sync back the register with the stack, this is done by the generic variable handling
6796 * code if there is a register assigned to a variable and the stack must be accessed.
6797 */
6798 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6799 break;
6800 case sizeof(RTUINT256U):
6801 /*
6802 * No need to sync back the register with the stack, this is done by the generic variable handling
6803 * code if there is a register assigned to a variable and the stack must be accessed.
6804 */
6805 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6806 break;
6807#endif
6808 default:
6809 AssertFailed();
6810 }
6811 break;
6812
6813 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6814 Assert(cbMem == 1);
6815 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6816 break;
6817
6818 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6819 Assert(cbMem == 1 || cbMem == 2);
6820 if (cbMem == 1)
6821 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6822 else
6823 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6824 break;
6825
6826 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6827 switch (cbMem)
6828 {
6829 case 1:
6830 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6831 break;
6832 case 2:
6833 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6834 break;
6835 case 4:
6836 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6837 break;
6838 default:
6839 AssertFailed();
6840 }
6841 break;
6842
6843 default:
6844 AssertFailed();
6845 }
6846
6847 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6848
6849 /*
6850 * TlbDone:
6851 */
6852 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6853
6854 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6855
6856# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6857 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6858 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6859# endif
6860 }
6861#else
6862 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6863#endif
6864
6865 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6866 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6867 return off;
6868}
6869
6870
6871
6872/*********************************************************************************************************************************
6873* Memory fetches (IEM_MEM_FETCH_XXX). *
6874*********************************************************************************************************************************/
6875
6876/* 8-bit segmented: */
6877#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6878 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6879 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6880 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6881
6882#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6883 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6884 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6885 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6886
6887#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6889 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6890 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6891
6892#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6894 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6895 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6896
6897#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6899 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6900 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6901
6902#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6904 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6905 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6906
6907#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6908 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6909 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6910 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6911
6912/* 16-bit segmented: */
6913#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6915 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6916 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6917
6918#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6919 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6920 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6921 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6922
6923#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6925 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6926 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6927
6928#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6930 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6931 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6932
6933#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6935 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6936 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6937
6938#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6939 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6940 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6941 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6942
6943
6944/* 32-bit segmented: */
6945#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6947 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6948 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6949
6950#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6951 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6952 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6953 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6954
6955#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6956 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6957 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6958 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6959
6960#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6961 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6962 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6963 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6964
6965#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6966 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6967 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6968 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6969
6970#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6971 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6972 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6973 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6974
6975#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6976 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6977 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6978 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6979
6980AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6981#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6982 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6983 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6984 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6985
6986
6987/* 64-bit segmented: */
6988#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6990 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6991 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6992
6993AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6994#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6996 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6997 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6998
6999
7000/* 8-bit flat: */
7001#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7003 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7004 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7005
7006#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7007 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7008 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7009 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7010
7011#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7012 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7013 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7014 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7015
7016#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7017 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7018 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7019 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7020
7021#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7022 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7023 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7024 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7025
7026#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7027 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7028 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7029 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7030
7031#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7033 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7034 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7035
7036
7037/* 16-bit flat: */
7038#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7040 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7041 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7042
7043#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7045 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7046 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7047
7048#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7049 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7050 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7051 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7052
7053#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7054 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7055 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7056 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7057
7058#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7059 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7060 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7061 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7062
7063#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7064 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7065 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7066 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7067
7068/* 32-bit flat: */
7069#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7071 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7072 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7073
7074#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7076 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7077 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7078
7079#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7080 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7081 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7082 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7083
7084#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7086 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7087 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7088
7089#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7091 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7092 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7093
7094#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7095 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7096 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7097 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7098
7099#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7100 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7101 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7102 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7103
7104#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7105 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7106 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7107 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7108
7109
7110/* 64-bit flat: */
7111#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7112 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7113 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7114 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7115
7116#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7117 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7118 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7119 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7120
7121#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7122/* 128-bit segmented: */
7123#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7125 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7126 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7127
7128#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7129 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7130 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7131 kIemNativeEmitMemOp_Fetch, \
7132 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7133
7134AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7135#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7136 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7137 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7138 kIemNativeEmitMemOp_Fetch, \
7139 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7140
7141#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7143 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7144 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7145
7146/* 128-bit flat: */
7147#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7148 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7149 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7150 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7151
7152#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7154 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7155 kIemNativeEmitMemOp_Fetch, \
7156 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7157
7158#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7159 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7160 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7161 kIemNativeEmitMemOp_Fetch, \
7162 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7163
7164#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7165 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7166 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7167 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7168
7169#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7170 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7171 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7172 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7173
7174/* 256-bit segmented: */
7175#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7176 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7177 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7178 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7179
7180#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7181 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7182 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7183 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7184
7185#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7186 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7187 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7188 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7189
7190
7191/* 256-bit flat: */
7192#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7193 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7194 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7195 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7196
7197#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7198 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7199 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7200 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7201
7202#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7203 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7204 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7205 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7206
7207#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7208 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7209 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7210 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7211
7212#endif
7213
7214
7215/*********************************************************************************************************************************
7216* Memory stores (IEM_MEM_STORE_XXX). *
7217*********************************************************************************************************************************/
7218
7219#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7220 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7221 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7222 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7223
7224#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7225 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7226 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7227 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7228
7229#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7230 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7231 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7232 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7233
7234#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7235 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7236 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7237 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7238
7239
7240#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7241 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7242 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7243 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7244
7245#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7246 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7247 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7248 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7249
7250#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7251 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7252 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7253 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7254
7255#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7256 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7257 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7258 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7259
7260
7261#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7262 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7263 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7264
7265#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7266 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7267 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7268
7269#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7270 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7271 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7272
7273#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7274 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7275 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7276
7277
7278#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7279 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7280 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7281
7282#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7283 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7284 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7285
7286#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7287 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7288 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7289
7290#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7291 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7292 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7293
7294/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7295 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7296DECL_INLINE_THROW(uint32_t)
7297iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7298 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7299{
7300 /*
7301 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7302 * to do the grunt work.
7303 */
7304 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7305 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7306 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7307 pfnFunction, idxInstr);
7308 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7309 return off;
7310}
7311
7312
7313#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7314# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7315 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7316 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7317 kIemNativeEmitMemOp_Store, \
7318 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7319
7320# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7321 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7322 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7323 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7324
7325# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7326 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7327 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7328 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7329
7330# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7331 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7332 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7333 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7334
7335
7336# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7337 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7338 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7339 kIemNativeEmitMemOp_Store, \
7340 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7341
7342# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7343 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7344 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7345 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7346
7347# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7348 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7349 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7350 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7351
7352# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7353 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7354 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7355 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7356#endif
7357
7358
7359
7360/*********************************************************************************************************************************
7361* Stack Accesses. *
7362*********************************************************************************************************************************/
7363/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7364#define IEM_MC_PUSH_U16(a_u16Value) \
7365 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7366 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7367#define IEM_MC_PUSH_U32(a_u32Value) \
7368 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7369 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7370#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7371 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7372 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7373#define IEM_MC_PUSH_U64(a_u64Value) \
7374 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7375 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7376
7377#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7378 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7379 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7380#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7381 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7382 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7383#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7384 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7385 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7386
7387#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7388 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7389 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7390#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7391 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7392 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7393
7394
7395/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7396DECL_INLINE_THROW(uint32_t)
7397iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7398 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7399{
7400 /*
7401 * Assert sanity.
7402 */
7403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7404 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7405#ifdef VBOX_STRICT
7406 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7407 {
7408 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7409 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7410 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7411 Assert( pfnFunction
7412 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7413 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7414 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7415 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7416 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7417 : UINT64_C(0xc000b000a0009000) ));
7418 }
7419 else
7420 Assert( pfnFunction
7421 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7422 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7423 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7424 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7425 : UINT64_C(0xc000b000a0009000) ));
7426#endif
7427
7428#ifdef VBOX_STRICT
7429 /*
7430 * Check that the fExec flags we've got make sense.
7431 */
7432 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7433#endif
7434
7435 /*
7436 * To keep things simple we have to commit any pending writes first as we
7437 * may end up making calls.
7438 */
7439 /** @todo we could postpone this till we make the call and reload the
7440 * registers after returning from the call. Not sure if that's sensible or
7441 * not, though. */
7442 off = iemNativeRegFlushPendingWrites(pReNative, off);
7443
7444 /*
7445 * First we calculate the new RSP and the effective stack pointer value.
7446 * For 64-bit mode and flat 32-bit these two are the same.
7447 * (Code structure is very similar to that of PUSH)
7448 */
7449 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7450 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7451 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7452 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7453 ? cbMem : sizeof(uint16_t);
7454 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7455 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7456 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7457 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7458 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7459 if (cBitsFlat != 0)
7460 {
7461 Assert(idxRegEffSp == idxRegRsp);
7462 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7463 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7464 if (cBitsFlat == 64)
7465 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7466 else
7467 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7468 }
7469 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7470 {
7471 Assert(idxRegEffSp != idxRegRsp);
7472 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7473 kIemNativeGstRegUse_ReadOnly);
7474#ifdef RT_ARCH_AMD64
7475 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7476#else
7477 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7478#endif
7479 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7480 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7481 offFixupJumpToUseOtherBitSp = off;
7482 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7483 {
7484 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7485 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7486 }
7487 else
7488 {
7489 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7490 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7491 }
7492 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7493 }
7494 /* SpUpdateEnd: */
7495 uint32_t const offLabelSpUpdateEnd = off;
7496
7497 /*
7498 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7499 * we're skipping lookup).
7500 */
7501 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7502 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7503 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7504 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7505 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7506 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7507 : UINT32_MAX;
7508 uint8_t const idxRegValue = !TlbState.fSkip
7509 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7510 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7511 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7512 : UINT8_MAX;
7513 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7514
7515
7516 if (!TlbState.fSkip)
7517 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7518 else
7519 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7520
7521 /*
7522 * Use16BitSp:
7523 */
7524 if (cBitsFlat == 0)
7525 {
7526#ifdef RT_ARCH_AMD64
7527 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7528#else
7529 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7530#endif
7531 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7532 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7533 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7534 else
7535 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7536 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7538 }
7539
7540 /*
7541 * TlbMiss:
7542 *
7543 * Call helper to do the pushing.
7544 */
7545 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7546
7547#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7548 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7549#else
7550 RT_NOREF(idxInstr);
7551#endif
7552
7553 /* Save variables in volatile registers. */
7554 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7555 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7556 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7557 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7558 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7559
7560 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7561 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7562 {
7563 /* Swap them using ARG0 as temp register: */
7564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7565 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7566 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7567 }
7568 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7569 {
7570 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7571 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7572 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7573
7574 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7575 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7576 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7577 }
7578 else
7579 {
7580 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7582
7583 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7584 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7585 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7586 }
7587
7588 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7589 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7590
7591 /* Done setting up parameters, make the call. */
7592 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7593
7594 /* Restore variables and guest shadow registers to volatile registers. */
7595 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7596 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7597
7598#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7599 if (!TlbState.fSkip)
7600 {
7601 /* end of TlbMiss - Jump to the done label. */
7602 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7603 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7604
7605 /*
7606 * TlbLookup:
7607 */
7608 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7609 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7610
7611 /*
7612 * Emit code to do the actual storing / fetching.
7613 */
7614 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7615# ifdef IEM_WITH_TLB_STATISTICS
7616 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7617 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7618# endif
7619 if (idxRegValue != UINT8_MAX)
7620 {
7621 switch (cbMemAccess)
7622 {
7623 case 2:
7624 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7625 break;
7626 case 4:
7627 if (!fIsIntelSeg)
7628 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7629 else
7630 {
7631 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7632 PUSH FS in real mode, so we have to try emulate that here.
7633 We borrow the now unused idxReg1 from the TLB lookup code here. */
7634 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7635 kIemNativeGstReg_EFlags);
7636 if (idxRegEfl != UINT8_MAX)
7637 {
7638#ifdef ARCH_AMD64
7639 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7640 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7641 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7642#else
7643 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7644 off, TlbState.idxReg1, idxRegEfl,
7645 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7646#endif
7647 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7648 }
7649 else
7650 {
7651 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7652 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7653 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7654 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7655 }
7656 /* ASSUMES the upper half of idxRegValue is ZERO. */
7657 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7658 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7659 }
7660 break;
7661 case 8:
7662 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7663 break;
7664 default:
7665 AssertFailed();
7666 }
7667 }
7668 else
7669 {
7670 switch (cbMemAccess)
7671 {
7672 case 2:
7673 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7674 idxRegMemResult, TlbState.idxReg1);
7675 break;
7676 case 4:
7677 Assert(!fIsSegReg);
7678 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7679 idxRegMemResult, TlbState.idxReg1);
7680 break;
7681 case 8:
7682 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7683 break;
7684 default:
7685 AssertFailed();
7686 }
7687 }
7688
7689 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7690 TlbState.freeRegsAndReleaseVars(pReNative);
7691
7692 /*
7693 * TlbDone:
7694 *
7695 * Commit the new RSP value.
7696 */
7697 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7698 }
7699#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7700
7701#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7702 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7703#endif
7704 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7705 if (idxRegEffSp != idxRegRsp)
7706 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7707
7708 /* The value variable is implictly flushed. */
7709 if (idxRegValue != UINT8_MAX)
7710 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7711 iemNativeVarFreeLocal(pReNative, idxVarValue);
7712
7713 return off;
7714}
7715
7716
7717
7718/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7719#define IEM_MC_POP_GREG_U16(a_iGReg) \
7720 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7721 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7722#define IEM_MC_POP_GREG_U32(a_iGReg) \
7723 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7724 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7725#define IEM_MC_POP_GREG_U64(a_iGReg) \
7726 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7727 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7728
7729#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7730 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7731 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7732#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7733 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7734 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7735
7736#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7737 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7738 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7739#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7740 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7741 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7742
7743
7744DECL_FORCE_INLINE_THROW(uint32_t)
7745iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7746 uint8_t idxRegTmp)
7747{
7748 /* Use16BitSp: */
7749#ifdef RT_ARCH_AMD64
7750 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7751 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7752 RT_NOREF(idxRegTmp);
7753#else
7754 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7755 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7756 /* add tmp, regrsp, #cbMem */
7757 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7758 /* and tmp, tmp, #0xffff */
7759 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7760 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7761 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7762 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7763#endif
7764 return off;
7765}
7766
7767
7768DECL_FORCE_INLINE(uint32_t)
7769iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7770{
7771 /* Use32BitSp: */
7772 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7773 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7774 return off;
7775}
7776
7777
7778/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7779DECL_INLINE_THROW(uint32_t)
7780iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7781 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7782{
7783 /*
7784 * Assert sanity.
7785 */
7786 Assert(idxGReg < 16);
7787#ifdef VBOX_STRICT
7788 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7789 {
7790 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7791 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7792 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7793 Assert( pfnFunction
7794 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7795 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7796 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7797 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7798 : UINT64_C(0xc000b000a0009000) ));
7799 }
7800 else
7801 Assert( pfnFunction
7802 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7803 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7804 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7805 : UINT64_C(0xc000b000a0009000) ));
7806#endif
7807
7808#ifdef VBOX_STRICT
7809 /*
7810 * Check that the fExec flags we've got make sense.
7811 */
7812 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7813#endif
7814
7815 /*
7816 * To keep things simple we have to commit any pending writes first as we
7817 * may end up making calls.
7818 */
7819 off = iemNativeRegFlushPendingWrites(pReNative, off);
7820
7821 /*
7822 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7823 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7824 * directly as the effective stack pointer.
7825 * (Code structure is very similar to that of PUSH)
7826 */
7827 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7828 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7829 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7830 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7831 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7832 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7833 * will be the resulting register value. */
7834 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7835
7836 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7837 if (cBitsFlat != 0)
7838 {
7839 Assert(idxRegEffSp == idxRegRsp);
7840 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7841 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7842 }
7843 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7844 {
7845 Assert(idxRegEffSp != idxRegRsp);
7846 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7847 kIemNativeGstRegUse_ReadOnly);
7848#ifdef RT_ARCH_AMD64
7849 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7850#else
7851 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7852#endif
7853 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7854 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7855 offFixupJumpToUseOtherBitSp = off;
7856 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7857 {
7858/** @todo can skip idxRegRsp updating when popping ESP. */
7859 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7860 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7861 }
7862 else
7863 {
7864 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7865 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7866 }
7867 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7868 }
7869 /* SpUpdateEnd: */
7870 uint32_t const offLabelSpUpdateEnd = off;
7871
7872 /*
7873 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7874 * we're skipping lookup).
7875 */
7876 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7877 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7878 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7879 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7880 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7881 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7882 : UINT32_MAX;
7883
7884 if (!TlbState.fSkip)
7885 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7886 else
7887 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7888
7889 /*
7890 * Use16BitSp:
7891 */
7892 if (cBitsFlat == 0)
7893 {
7894#ifdef RT_ARCH_AMD64
7895 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7896#else
7897 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7898#endif
7899 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7900 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7901 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7902 else
7903 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7904 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7906 }
7907
7908 /*
7909 * TlbMiss:
7910 *
7911 * Call helper to do the pushing.
7912 */
7913 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7914
7915#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7916 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7917#else
7918 RT_NOREF(idxInstr);
7919#endif
7920
7921 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7922 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7923 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7924 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7925
7926
7927 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7928 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7929 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7930
7931 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7932 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7933
7934 /* Done setting up parameters, make the call. */
7935 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7936
7937 /* Move the return register content to idxRegMemResult. */
7938 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7939 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7940
7941 /* Restore variables and guest shadow registers to volatile registers. */
7942 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7943 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7944
7945#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7946 if (!TlbState.fSkip)
7947 {
7948 /* end of TlbMiss - Jump to the done label. */
7949 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7950 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7951
7952 /*
7953 * TlbLookup:
7954 */
7955 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7956 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7957
7958 /*
7959 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7960 */
7961 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7962# ifdef IEM_WITH_TLB_STATISTICS
7963 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7964 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7965# endif
7966 switch (cbMem)
7967 {
7968 case 2:
7969 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7970 break;
7971 case 4:
7972 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7973 break;
7974 case 8:
7975 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7976 break;
7977 default:
7978 AssertFailed();
7979 }
7980
7981 TlbState.freeRegsAndReleaseVars(pReNative);
7982
7983 /*
7984 * TlbDone:
7985 *
7986 * Set the new RSP value (FLAT accesses needs to calculate it first) and
7987 * commit the popped register value.
7988 */
7989 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7990 }
7991#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7992
7993 if (idxGReg != X86_GREG_xSP)
7994 {
7995 /* Set the register. */
7996 if (cbMem >= sizeof(uint32_t))
7997 {
7998#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7999 AssertMsg( pReNative->idxCurCall == 0
8000 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8001 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8002#endif
8003 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8004#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8005 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8006#endif
8007#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8008 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8009 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8010#endif
8011 }
8012 else
8013 {
8014 Assert(cbMem == sizeof(uint16_t));
8015 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8016 kIemNativeGstRegUse_ForUpdate);
8017 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8018#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8019 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8020#endif
8021 iemNativeRegFreeTmp(pReNative, idxRegDst);
8022 }
8023
8024 /* Complete RSP calculation for FLAT mode. */
8025 if (idxRegEffSp == idxRegRsp)
8026 {
8027 if (cBitsFlat == 64)
8028 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8029 else
8030 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8031 }
8032 }
8033 else
8034 {
8035 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8036 if (cbMem == sizeof(uint64_t))
8037 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8038 else if (cbMem == sizeof(uint32_t))
8039 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8040 else
8041 {
8042 if (idxRegEffSp == idxRegRsp)
8043 {
8044 if (cBitsFlat == 64)
8045 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8046 else
8047 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8048 }
8049 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8050 }
8051 }
8052
8053#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8054 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8055#endif
8056
8057 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8058 if (idxRegEffSp != idxRegRsp)
8059 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8060 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8061
8062 return off;
8063}
8064
8065
8066
8067/*********************************************************************************************************************************
8068* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8069*********************************************************************************************************************************/
8070
8071#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8072 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8073 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8074 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8075
8076#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8077 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8078 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8079 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8080
8081#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8082 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8083 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8084 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8085
8086#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8087 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8088 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8089 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8090
8091
8092#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8093 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8094 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8095 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8096
8097#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8098 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8099 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8100 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8101
8102#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8103 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8104 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8105 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8106
8107#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8109 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8110 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8111
8112#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8113 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8114 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8115 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8116
8117
8118#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8119 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8120 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8121 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8122
8123#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8124 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8125 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8126 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8127
8128#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8129 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8130 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8131 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8132
8133#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8134 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8135 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8136 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8137
8138#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8139 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8140 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8141 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8142
8143
8144#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8145 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8146 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8147 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8148
8149#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8150 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8151 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8152 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8153#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8154 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8155 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8156 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8157
8158#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8159 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8160 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8161 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8162
8163#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8165 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8166 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8167
8168
8169#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8170 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8171 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8172 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8173
8174#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8175 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8176 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8177 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8178
8179
8180#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8181 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8182 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8183 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8184
8185#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8186 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8187 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8188 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8189
8190#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8191 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8192 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8193 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8194
8195#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8196 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8197 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8198 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8199
8200
8201
8202#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8203 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8204 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8205 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8206
8207#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8208 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8209 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8210 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8211
8212#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8213 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8214 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8215 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8216
8217#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8218 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8219 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8220 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8221
8222
8223#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8224 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8225 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8226 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8227
8228#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8229 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8230 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8231 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8232
8233#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8234 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8235 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8236 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8237
8238#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8239 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8240 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8241 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8242
8243#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8244 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8245 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8246 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8247
8248
8249#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8250 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8251 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8252 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8253
8254#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8255 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8256 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8257 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8258
8259#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8260 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8261 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8262 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8263
8264#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8265 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8266 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8267 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8268
8269#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8270 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8271 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8272 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8273
8274
8275#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8276 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8277 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8278 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8279
8280#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8281 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8282 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8283 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8284
8285#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8286 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8287 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8288 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8289
8290#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8291 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8292 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8293 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8294
8295#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8296 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8297 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8298 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8299
8300
8301#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8302 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8303 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8304 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8305
8306#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8307 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8308 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8309 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8310
8311
8312#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8314 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8315 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8316
8317#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8318 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8319 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8320 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8321
8322#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8323 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8324 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8325 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8326
8327#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8328 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8329 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8330 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8331
8332
8333DECL_INLINE_THROW(uint32_t)
8334iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8335 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8336 uintptr_t pfnFunction, uint8_t idxInstr)
8337{
8338 /*
8339 * Assert sanity.
8340 */
8341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8342 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8343 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8344 && pVarMem->cbVar == sizeof(void *),
8345 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8346
8347 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8348 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8349 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8350 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8351 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8352
8353 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8354 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8355 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8356 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8357 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8358
8359 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8360
8361 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8362
8363#ifdef VBOX_STRICT
8364# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8365 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8366 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8367 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8368 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8369# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8370 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8371 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8372 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8373
8374 if (iSegReg == UINT8_MAX)
8375 {
8376 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8377 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8378 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8379 switch (cbMem)
8380 {
8381 case 1:
8382 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8383 Assert(!fAlignMaskAndCtl);
8384 break;
8385 case 2:
8386 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8387 Assert(fAlignMaskAndCtl < 2);
8388 break;
8389 case 4:
8390 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8391 Assert(fAlignMaskAndCtl < 4);
8392 break;
8393 case 8:
8394 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8395 Assert(fAlignMaskAndCtl < 8);
8396 break;
8397 case 10:
8398 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8399 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8400 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8401 Assert(fAlignMaskAndCtl < 8);
8402 break;
8403 case 16:
8404 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8405 Assert(fAlignMaskAndCtl < 16);
8406 break;
8407# if 0
8408 case 32:
8409 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8410 Assert(fAlignMaskAndCtl < 32);
8411 break;
8412 case 64:
8413 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8414 Assert(fAlignMaskAndCtl < 64);
8415 break;
8416# endif
8417 default: AssertFailed(); break;
8418 }
8419 }
8420 else
8421 {
8422 Assert(iSegReg < 6);
8423 switch (cbMem)
8424 {
8425 case 1:
8426 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8427 Assert(!fAlignMaskAndCtl);
8428 break;
8429 case 2:
8430 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8431 Assert(fAlignMaskAndCtl < 2);
8432 break;
8433 case 4:
8434 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8435 Assert(fAlignMaskAndCtl < 4);
8436 break;
8437 case 8:
8438 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8439 Assert(fAlignMaskAndCtl < 8);
8440 break;
8441 case 10:
8442 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8443 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8444 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8445 Assert(fAlignMaskAndCtl < 8);
8446 break;
8447 case 16:
8448 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8449 Assert(fAlignMaskAndCtl < 16);
8450 break;
8451# if 0
8452 case 32:
8453 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8454 Assert(fAlignMaskAndCtl < 32);
8455 break;
8456 case 64:
8457 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8458 Assert(fAlignMaskAndCtl < 64);
8459 break;
8460# endif
8461 default: AssertFailed(); break;
8462 }
8463 }
8464# undef IEM_MAP_HLP_FN
8465# undef IEM_MAP_HLP_FN_NO_AT
8466#endif
8467
8468#ifdef VBOX_STRICT
8469 /*
8470 * Check that the fExec flags we've got make sense.
8471 */
8472 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8473#endif
8474
8475 /*
8476 * To keep things simple we have to commit any pending writes first as we
8477 * may end up making calls.
8478 */
8479 off = iemNativeRegFlushPendingWrites(pReNative, off);
8480
8481#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8482 /*
8483 * Move/spill/flush stuff out of call-volatile registers.
8484 * This is the easy way out. We could contain this to the tlb-miss branch
8485 * by saving and restoring active stuff here.
8486 */
8487 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8488 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8489#endif
8490
8491 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8492 while the tlb-miss codepath will temporarily put it on the stack.
8493 Set the the type to stack here so we don't need to do it twice below. */
8494 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8495 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8496 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8497 * lookup is done. */
8498
8499 /*
8500 * Define labels and allocate the result register (trying for the return
8501 * register if we can).
8502 */
8503 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8504 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8505 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8506 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8507 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8508 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8509 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8510 : UINT32_MAX;
8511//off=iemNativeEmitBrk(pReNative, off, 0);
8512 /*
8513 * Jump to the TLB lookup code.
8514 */
8515 if (!TlbState.fSkip)
8516 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8517
8518 /*
8519 * TlbMiss:
8520 *
8521 * Call helper to do the fetching.
8522 * We flush all guest register shadow copies here.
8523 */
8524 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8525
8526#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8527 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8528#else
8529 RT_NOREF(idxInstr);
8530#endif
8531
8532#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8533 /* Save variables in volatile registers. */
8534 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8535 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8536#endif
8537
8538 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8539 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8540#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8541 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8542#else
8543 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8544#endif
8545
8546 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8547 if (iSegReg != UINT8_MAX)
8548 {
8549 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8550 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8551 }
8552
8553 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8554 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8555 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8556
8557 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8558 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8559
8560 /* Done setting up parameters, make the call. */
8561 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8562
8563 /*
8564 * Put the output in the right registers.
8565 */
8566 Assert(idxRegMemResult == pVarMem->idxReg);
8567 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8568 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8569
8570#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8571 /* Restore variables and guest shadow registers to volatile registers. */
8572 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8573 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8574#endif
8575
8576 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8577 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8578
8579#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8580 if (!TlbState.fSkip)
8581 {
8582 /* end of tlbsmiss - Jump to the done label. */
8583 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8584 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8585
8586 /*
8587 * TlbLookup:
8588 */
8589 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8590 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8591# ifdef IEM_WITH_TLB_STATISTICS
8592 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8593 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8594# endif
8595
8596 /* [idxVarUnmapInfo] = 0; */
8597 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8598
8599 /*
8600 * TlbDone:
8601 */
8602 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8603
8604 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8605
8606# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8607 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8608 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8609# endif
8610 }
8611#else
8612 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8613#endif
8614
8615 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8616 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8617
8618 return off;
8619}
8620
8621
8622#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8623 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8624 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8625
8626#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8627 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8628 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8629
8630#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8631 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8632 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8633
8634#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8635 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8636 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8637
8638DECL_INLINE_THROW(uint32_t)
8639iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8640 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8641{
8642 /*
8643 * Assert sanity.
8644 */
8645 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8646#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8647 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8648#endif
8649 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8650 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8651 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8652#ifdef VBOX_STRICT
8653 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8654 {
8655 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8656 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8657 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8658 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8659 case IEM_ACCESS_TYPE_WRITE:
8660 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8661 case IEM_ACCESS_TYPE_READ:
8662 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8663 default: AssertFailed();
8664 }
8665#else
8666 RT_NOREF(fAccess);
8667#endif
8668
8669 /*
8670 * To keep things simple we have to commit any pending writes first as we
8671 * may end up making calls (there shouldn't be any at this point, so this
8672 * is just for consistency).
8673 */
8674 /** @todo we could postpone this till we make the call and reload the
8675 * registers after returning from the call. Not sure if that's sensible or
8676 * not, though. */
8677 off = iemNativeRegFlushPendingWrites(pReNative, off);
8678
8679 /*
8680 * Move/spill/flush stuff out of call-volatile registers.
8681 *
8682 * We exclude any register holding the bUnmapInfo variable, as we'll be
8683 * checking it after returning from the call and will free it afterwards.
8684 */
8685 /** @todo save+restore active registers and maybe guest shadows in miss
8686 * scenario. */
8687 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8688 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8689
8690 /*
8691 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8692 * to call the unmap helper function.
8693 *
8694 * The likelyhood of it being zero is higher than for the TLB hit when doing
8695 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8696 * access should also end up with a mapping that won't need special unmapping.
8697 */
8698 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8699 * should speed up things for the pure interpreter as well when TLBs
8700 * are enabled. */
8701#ifdef RT_ARCH_AMD64
8702 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8703 {
8704 /* test byte [rbp - xxx], 0ffh */
8705 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8706 pbCodeBuf[off++] = 0xf6;
8707 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8708 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8709 pbCodeBuf[off++] = 0xff;
8710 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8711 }
8712 else
8713#endif
8714 {
8715 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8716 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8717 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8718 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8719 }
8720 uint32_t const offJmpFixup = off;
8721 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8722
8723 /*
8724 * Call the unmap helper function.
8725 */
8726#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8727 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8728#else
8729 RT_NOREF(idxInstr);
8730#endif
8731
8732 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8733 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8734 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8735
8736 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8737 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8738
8739 /* Done setting up parameters, make the call. */
8740 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8741
8742 /* The bUnmapInfo variable is implictly free by these MCs. */
8743 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8744
8745 /*
8746 * Done, just fixup the jump for the non-call case.
8747 */
8748 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8749
8750 return off;
8751}
8752
8753
8754
8755/*********************************************************************************************************************************
8756* State and Exceptions *
8757*********************************************************************************************************************************/
8758
8759#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8760#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8761
8762#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8763#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8764#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8765
8766#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8767#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8768#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8769
8770
8771DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8772{
8773 /** @todo this needs a lot more work later. */
8774 RT_NOREF(pReNative, fForChange);
8775 return off;
8776}
8777
8778
8779
8780/*********************************************************************************************************************************
8781* Emitters for FPU related operations. *
8782*********************************************************************************************************************************/
8783
8784#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8785 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8786
8787/** Emits code for IEM_MC_FETCH_FCW. */
8788DECL_INLINE_THROW(uint32_t)
8789iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8790{
8791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8793
8794 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8795
8796 /* Allocate a temporary FCW register. */
8797 /** @todo eliminate extra register */
8798 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8799 kIemNativeGstRegUse_ReadOnly);
8800
8801 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8802
8803 /* Free but don't flush the FCW register. */
8804 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8805 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8806
8807 return off;
8808}
8809
8810
8811#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8812 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8813
8814/** Emits code for IEM_MC_FETCH_FSW. */
8815DECL_INLINE_THROW(uint32_t)
8816iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8817{
8818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8820
8821 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8822 /* Allocate a temporary FSW register. */
8823 /** @todo eliminate extra register */
8824 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8825 kIemNativeGstRegUse_ReadOnly);
8826
8827 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8828
8829 /* Free but don't flush the FSW register. */
8830 iemNativeRegFreeTmp(pReNative, idxFswReg);
8831 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8832
8833 return off;
8834}
8835
8836
8837
8838#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8839
8840
8841/*********************************************************************************************************************************
8842* Emitters for SSE/AVX specific operations. *
8843*********************************************************************************************************************************/
8844
8845#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8846 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8847
8848/** Emits code for IEM_MC_COPY_XREG_U128. */
8849DECL_INLINE_THROW(uint32_t)
8850iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8851{
8852 /* This is a nop if the source and destination register are the same. */
8853 if (iXRegDst != iXRegSrc)
8854 {
8855 /* Allocate destination and source register. */
8856 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8857 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8858 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8859 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8860
8861 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8862
8863 /* Free but don't flush the source and destination register. */
8864 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8865 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8866 }
8867
8868 return off;
8869}
8870
8871
8872#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8873 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8874
8875/** Emits code for IEM_MC_FETCH_XREG_U128. */
8876DECL_INLINE_THROW(uint32_t)
8877iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8878{
8879 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8880 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8881
8882 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8883 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8884
8885 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8886
8887 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8888
8889 /* Free but don't flush the source register. */
8890 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8891 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8892
8893 return off;
8894}
8895
8896
8897#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8898 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8899
8900#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8901 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8902
8903/** Emits code for IEM_MC_FETCH_XREG_U64. */
8904DECL_INLINE_THROW(uint32_t)
8905iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8906{
8907 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8908 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8909
8910 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8911 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8912
8913 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8914 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8915
8916 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8917
8918 /* Free but don't flush the source register. */
8919 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8920 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8921
8922 return off;
8923}
8924
8925
8926#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8927 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8928
8929#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8930 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8931
8932/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8933DECL_INLINE_THROW(uint32_t)
8934iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8935{
8936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8937 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8938
8939 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8940 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8941
8942 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8943 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8944
8945 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8946
8947 /* Free but don't flush the source register. */
8948 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8949 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8950
8951 return off;
8952}
8953
8954
8955#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8956 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8957
8958/** Emits code for IEM_MC_FETCH_XREG_U16. */
8959DECL_INLINE_THROW(uint32_t)
8960iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8961{
8962 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8963 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8964
8965 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8966 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8967
8968 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8969 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8970
8971 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
8972
8973 /* Free but don't flush the source register. */
8974 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8975 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8976
8977 return off;
8978}
8979
8980
8981#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
8982 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
8983
8984/** Emits code for IEM_MC_FETCH_XREG_U8. */
8985DECL_INLINE_THROW(uint32_t)
8986iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
8987{
8988 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8989 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
8990
8991 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8992 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8993
8994 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8995 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8996
8997 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
8998
8999 /* Free but don't flush the source register. */
9000 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9001 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9002
9003 return off;
9004}
9005
9006
9007#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9008 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9009
9010AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9011#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9012 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9013
9014
9015/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9016DECL_INLINE_THROW(uint32_t)
9017iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9018{
9019 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9020 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9021
9022 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9023 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
9024 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9025
9026 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9027
9028 /* Free but don't flush the source register. */
9029 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9030 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9031
9032 return off;
9033}
9034
9035
9036#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9037 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9038
9039#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9040 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9041
9042#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9043 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9044
9045#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9046 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9047
9048#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9049 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9050
9051#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9052 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9053
9054/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9055DECL_INLINE_THROW(uint32_t)
9056iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9057 uint8_t cbLocal, uint8_t iElem)
9058{
9059 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9060 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9061
9062#ifdef VBOX_STRICT
9063 switch (cbLocal)
9064 {
9065 case sizeof(uint64_t): Assert(iElem < 2); break;
9066 case sizeof(uint32_t): Assert(iElem < 4); break;
9067 case sizeof(uint16_t): Assert(iElem < 8); break;
9068 case sizeof(uint8_t): Assert(iElem < 16); break;
9069 default: AssertFailed();
9070 }
9071#endif
9072
9073 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9074 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9075 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9076
9077 switch (cbLocal)
9078 {
9079 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9080 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9081 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9082 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9083 default: AssertFailed();
9084 }
9085
9086 /* Free but don't flush the source register. */
9087 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9088 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9089
9090 return off;
9091}
9092
9093
9094#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9095 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9096
9097/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9098DECL_INLINE_THROW(uint32_t)
9099iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9100{
9101 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9102 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9103
9104 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9105 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9106 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9107
9108 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9109 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9110 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9111
9112 /* Free but don't flush the source register. */
9113 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9114 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9115
9116 return off;
9117}
9118
9119
9120#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9121 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9122
9123/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9124DECL_INLINE_THROW(uint32_t)
9125iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9126{
9127 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9128 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9129
9130 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9131 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9132 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9133
9134 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9135 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9136 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9137
9138 /* Free but don't flush the source register. */
9139 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9140 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9141
9142 return off;
9143}
9144
9145
9146#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9147 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9148
9149/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9150DECL_INLINE_THROW(uint32_t)
9151iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9152 uint8_t idxSrcVar, uint8_t iDwSrc)
9153{
9154 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9155 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9156
9157 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9158 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9159 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9160
9161 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9162 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9163
9164 /* Free but don't flush the destination register. */
9165 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9166 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9167
9168 return off;
9169}
9170
9171
9172#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9173 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9174
9175/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9176DECL_INLINE_THROW(uint32_t)
9177iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9178{
9179 /*
9180 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9181 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9182 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9183 */
9184 if (iYRegDst != iYRegSrc)
9185 {
9186 /* Allocate destination and source register. */
9187 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9188 kIemNativeGstSimdRegLdStSz_256,
9189 kIemNativeGstRegUse_ForFullWrite);
9190 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9191 kIemNativeGstSimdRegLdStSz_Low128,
9192 kIemNativeGstRegUse_ReadOnly);
9193
9194 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9195 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9196
9197 /* Free but don't flush the source and destination register. */
9198 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9199 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9200 }
9201 else
9202 {
9203 /* This effectively only clears the upper 128-bits of the register. */
9204 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9205 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9206
9207 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9208
9209 /* Free but don't flush the destination register. */
9210 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9211 }
9212
9213 return off;
9214}
9215
9216
9217#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9218 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9219
9220/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9221DECL_INLINE_THROW(uint32_t)
9222iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9223{
9224 /*
9225 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9226 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9227 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9228 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
9229 */
9230 if (iYRegDst != iYRegSrc)
9231 {
9232 /* Allocate destination and source register. */
9233 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9234 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9235 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9236 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9237
9238 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9239
9240 /* Free but don't flush the source and destination register. */
9241 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9242 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9243 }
9244
9245 return off;
9246}
9247
9248
9249#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9250 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9251
9252/** Emits code for IEM_MC_FETCH_YREG_U128. */
9253DECL_INLINE_THROW(uint32_t)
9254iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9255{
9256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9257 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9258
9259 Assert(iDQWord <= 1);
9260 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9261 iDQWord == 1
9262 ? kIemNativeGstSimdRegLdStSz_High128
9263 : kIemNativeGstSimdRegLdStSz_Low128,
9264 kIemNativeGstRegUse_ReadOnly);
9265
9266 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9267 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9268
9269 if (iDQWord == 1)
9270 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9271 else
9272 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9273
9274 /* Free but don't flush the source register. */
9275 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9276 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9277
9278 return off;
9279}
9280
9281
9282#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9283 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9284
9285/** Emits code for IEM_MC_FETCH_YREG_U64. */
9286DECL_INLINE_THROW(uint32_t)
9287iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9288{
9289 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9290 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9291
9292 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9293 iQWord >= 2
9294 ? kIemNativeGstSimdRegLdStSz_High128
9295 : kIemNativeGstSimdRegLdStSz_Low128,
9296 kIemNativeGstRegUse_ReadOnly);
9297
9298 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9299 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9300
9301 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9302
9303 /* Free but don't flush the source register. */
9304 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9305 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9306
9307 return off;
9308}
9309
9310
9311#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9312 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9313
9314/** Emits code for IEM_MC_FETCH_YREG_U32. */
9315DECL_INLINE_THROW(uint32_t)
9316iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9317{
9318 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9319 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9320
9321 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9322 iDWord >= 4
9323 ? kIemNativeGstSimdRegLdStSz_High128
9324 : kIemNativeGstSimdRegLdStSz_Low128,
9325 kIemNativeGstRegUse_ReadOnly);
9326
9327 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9328 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9329
9330 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9331
9332 /* Free but don't flush the source register. */
9333 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9334 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9335
9336 return off;
9337}
9338
9339
9340#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9341 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9342
9343/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9344DECL_INLINE_THROW(uint32_t)
9345iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9346{
9347 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9348 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9349
9350 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9351
9352 /* Free but don't flush the register. */
9353 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9354
9355 return off;
9356}
9357
9358
9359#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9360 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9361
9362/** Emits code for IEM_MC_STORE_YREG_U128. */
9363DECL_INLINE_THROW(uint32_t)
9364iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9365{
9366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9367 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9368
9369 Assert(iDQword <= 1);
9370 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9371 iDQword == 0
9372 ? kIemNativeGstSimdRegLdStSz_Low128
9373 : kIemNativeGstSimdRegLdStSz_High128,
9374 kIemNativeGstRegUse_ForFullWrite);
9375
9376 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9377
9378 if (iDQword == 0)
9379 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9380 else
9381 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9382
9383 /* Free but don't flush the source register. */
9384 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9385 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9386
9387 return off;
9388}
9389
9390
9391#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9392 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9393
9394/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9395DECL_INLINE_THROW(uint32_t)
9396iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9397{
9398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9399 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9400
9401 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9402 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9403
9404 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9405
9406 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9407 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9408
9409 /* Free but don't flush the source register. */
9410 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9411 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9412
9413 return off;
9414}
9415
9416
9417#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9418 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9419
9420/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9421DECL_INLINE_THROW(uint32_t)
9422iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9423{
9424 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9425 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9426
9427 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9428 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9429
9430 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9431
9432 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9433 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9434
9435 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9436 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9437
9438 return off;
9439}
9440
9441
9442#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9443 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9444
9445/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9446DECL_INLINE_THROW(uint32_t)
9447iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9448{
9449 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9450 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9451
9452 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9453 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9454
9455 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9456
9457 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9458 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9459
9460 /* Free but don't flush the source register. */
9461 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9462 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9463
9464 return off;
9465}
9466
9467
9468#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9469 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9470
9471/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9472DECL_INLINE_THROW(uint32_t)
9473iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9474{
9475 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9476 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9477
9478 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9479 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9480
9481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9482
9483 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9484 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9485
9486 /* Free but don't flush the source register. */
9487 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9488 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9489
9490 return off;
9491}
9492
9493
9494#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9495 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9496
9497/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9498DECL_INLINE_THROW(uint32_t)
9499iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9500{
9501 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9502 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9503
9504 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9505 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9506
9507 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9508
9509 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9510 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9511
9512 /* Free but don't flush the source register. */
9513 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9514 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9515
9516 return off;
9517}
9518
9519
9520#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9521 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9522
9523/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9524DECL_INLINE_THROW(uint32_t)
9525iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9526{
9527 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9528 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9529
9530 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9531 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9532
9533 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9534
9535 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9536
9537 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9538 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9539
9540 return off;
9541}
9542
9543
9544#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9545 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9546
9547/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9548DECL_INLINE_THROW(uint32_t)
9549iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9550{
9551 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9552 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9553
9554 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9555 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9556
9557 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9558
9559 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9560
9561 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9562 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9563
9564 return off;
9565}
9566
9567
9568#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9569 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9570
9571/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9572DECL_INLINE_THROW(uint32_t)
9573iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9574{
9575 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9576 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9577
9578 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9579 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9580
9581 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9582
9583 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9584
9585 /* Free but don't flush the source register. */
9586 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9587 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9588
9589 return off;
9590}
9591
9592
9593#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9594 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9595
9596/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9597DECL_INLINE_THROW(uint32_t)
9598iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9599{
9600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9601 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9602
9603 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9604 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9605
9606 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9607
9608 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9609
9610 /* Free but don't flush the source register. */
9611 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9612 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9613
9614 return off;
9615}
9616
9617
9618#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9619 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9620
9621/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9622DECL_INLINE_THROW(uint32_t)
9623iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9624{
9625 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9626 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9627
9628 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9629 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9630
9631 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9632
9633 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9634
9635 /* Free but don't flush the source register. */
9636 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9637 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9638
9639 return off;
9640}
9641
9642
9643#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9644 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9645
9646/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9647DECL_INLINE_THROW(uint32_t)
9648iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9649{
9650 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9651 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9652
9653 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9654 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9655
9656 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9657
9658 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9659 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9660
9661 /* Free but don't flush the source register. */
9662 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9663 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9664
9665 return off;
9666}
9667
9668
9669#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9670 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9671
9672/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9673DECL_INLINE_THROW(uint32_t)
9674iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9675{
9676 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9677 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9678
9679 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9680 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9681
9682 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9683
9684 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9685 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9686
9687 /* Free but don't flush the source register. */
9688 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9689 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9690
9691 return off;
9692}
9693
9694
9695#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9696 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9697
9698/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9699DECL_INLINE_THROW(uint32_t)
9700iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9701{
9702 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9703 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9704
9705 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9706 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9707 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9708 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9709 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9710
9711 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9712 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9713 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9714
9715 /* Free but don't flush the source and destination registers. */
9716 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9717 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9718 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9719
9720 return off;
9721}
9722
9723
9724#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9725 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9726
9727/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9728DECL_INLINE_THROW(uint32_t)
9729iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9730{
9731 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9732 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9733
9734 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9735 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9736 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9737 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9738 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9739
9740 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9741 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9742 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9743
9744 /* Free but don't flush the source and destination registers. */
9745 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9746 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9747 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9748
9749 return off;
9750}
9751
9752
9753#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9754 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9755
9756
9757/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9758DECL_INLINE_THROW(uint32_t)
9759iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9760{
9761 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9762 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9763
9764 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9765 if (bImm8Mask & RT_BIT(0))
9766 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9767 if (bImm8Mask & RT_BIT(1))
9768 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9769 if (bImm8Mask & RT_BIT(2))
9770 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9771 if (bImm8Mask & RT_BIT(3))
9772 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9773
9774 /* Free but don't flush the destination register. */
9775 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9776
9777 return off;
9778}
9779
9780
9781#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9782 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9783
9784#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
9785 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
9786
9787/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
9788DECL_INLINE_THROW(uint32_t)
9789iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9790{
9791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9793
9794 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9795 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9796 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9797
9798 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9799
9800 /* Free but don't flush the source register. */
9801 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9802 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9803
9804 return off;
9805}
9806
9807
9808#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9809 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9810
9811#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
9812 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
9813
9814/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
9815DECL_INLINE_THROW(uint32_t)
9816iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9817{
9818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9820
9821 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9822 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9823 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9824
9825 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9826
9827 /* Free but don't flush the source register. */
9828 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9829 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9830
9831 return off;
9832}
9833
9834
9835#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9836 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9837
9838
9839/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9840DECL_INLINE_THROW(uint32_t)
9841iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9842 uint8_t idxSrcVar, uint8_t iDwSrc)
9843{
9844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9845 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9846
9847 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9848 iDwDst < 4
9849 ? kIemNativeGstSimdRegLdStSz_Low128
9850 : kIemNativeGstSimdRegLdStSz_High128,
9851 kIemNativeGstRegUse_ForUpdate);
9852 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9853 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9854
9855 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9856 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9857
9858 /* Free but don't flush the source register. */
9859 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9860 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9861 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9862
9863 return off;
9864}
9865
9866
9867#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9868 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9869
9870
9871/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9872DECL_INLINE_THROW(uint32_t)
9873iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9874 uint8_t idxSrcVar, uint8_t iQwSrc)
9875{
9876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9877 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9878
9879 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9880 iQwDst < 2
9881 ? kIemNativeGstSimdRegLdStSz_Low128
9882 : kIemNativeGstSimdRegLdStSz_High128,
9883 kIemNativeGstRegUse_ForUpdate);
9884 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9885 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9886
9887 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9888 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9889
9890 /* Free but don't flush the source register. */
9891 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9892 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9893 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9894
9895 return off;
9896}
9897
9898
9899#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9900 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9901
9902
9903/** Emits code for IEM_MC_STORE_YREG_U64. */
9904DECL_INLINE_THROW(uint32_t)
9905iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9906{
9907 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9908 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9909
9910 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9911 iQwDst < 2
9912 ? kIemNativeGstSimdRegLdStSz_Low128
9913 : kIemNativeGstSimdRegLdStSz_High128,
9914 kIemNativeGstRegUse_ForUpdate);
9915
9916 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9917
9918 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9919
9920 /* Free but don't flush the source register. */
9921 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9922 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9923
9924 return off;
9925}
9926
9927
9928#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9929 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9930
9931/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9932DECL_INLINE_THROW(uint32_t)
9933iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9934{
9935 RT_NOREF(pReNative, iYReg);
9936 /** @todo Needs to be implemented when support for AVX-512 is added. */
9937 return off;
9938}
9939
9940
9941
9942/*********************************************************************************************************************************
9943* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
9944*********************************************************************************************************************************/
9945
9946/**
9947 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
9948 */
9949DECL_INLINE_THROW(uint32_t)
9950iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
9951{
9952 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
9953 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9954 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
9955 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9956
9957 /*
9958 * Need to do the FPU preparation.
9959 */
9960 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
9961
9962 /*
9963 * Do all the call setup and cleanup.
9964 */
9965 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
9966
9967 /*
9968 * Load the MXCSR register into the first argument and mask out the current exception flags.
9969 */
9970 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
9971 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
9972
9973 /*
9974 * Make the call.
9975 */
9976 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9977
9978 /*
9979 * The updated MXCSR is in the return register.
9980 */
9981 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
9982
9983#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9984 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
9985 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
9986#endif
9987 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9988
9989 return off;
9990}
9991
9992
9993#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
9994 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9995
9996/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
9997DECL_INLINE_THROW(uint32_t)
9998iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9999{
10000 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10001 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10002 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10003}
10004
10005
10006#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10007 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10008
10009/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10010DECL_INLINE_THROW(uint32_t)
10011iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10012{
10013 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10014 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10015 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10016 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10017}
10018
10019
10020/*********************************************************************************************************************************
10021* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10022*********************************************************************************************************************************/
10023
10024#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10025 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10026
10027/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10028DECL_INLINE_THROW(uint32_t)
10029iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10030{
10031 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10032 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10033 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10034}
10035
10036
10037#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10038 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10039
10040/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10041DECL_INLINE_THROW(uint32_t)
10042iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10043{
10044 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10045 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10046 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10047 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10048}
10049#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10050
10051
10052/*********************************************************************************************************************************
10053* Include instruction emitters. *
10054*********************************************************************************************************************************/
10055#include "target-x86/IEMAllN8veEmit-x86.h"
10056
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette