VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105350

Last change on this file since 105350 was 105315, checked in by vboxsync, 9 months ago

VMM/IEM: Implement vcvttss2si, vcvttsd2si, vcvtss2si and vcvtsd2si instruction emulations, bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 491.6 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105315 2024-07-12 17:38:53Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
250 a_cbInstr) /** @todo not used ... */
251
252
253#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
254 pReNative->fMc = 0; \
255 pReNative->fCImpl = (a_fFlags); \
256 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
257
258DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
259 uint8_t idxInstr, uint64_t a_fGstShwFlush,
260 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
261{
262 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
263}
264
265
266#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
267 pReNative->fMc = 0; \
268 pReNative->fCImpl = (a_fFlags); \
269 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
270 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
271
272DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
273 uint8_t idxInstr, uint64_t a_fGstShwFlush,
274 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
275{
276 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
277}
278
279
280#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
281 pReNative->fMc = 0; \
282 pReNative->fCImpl = (a_fFlags); \
283 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
284 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
285
286DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
287 uint8_t idxInstr, uint64_t a_fGstShwFlush,
288 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
289 uint64_t uArg2)
290{
291 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
292}
293
294
295
296/*********************************************************************************************************************************
297* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
298*********************************************************************************************************************************/
299
300/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
301 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
302DECL_INLINE_THROW(uint32_t)
303iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
304{
305 /*
306 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
307 * return with special status code and make the execution loop deal with
308 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
309 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
310 * could continue w/o interruption, it probably will drop into the
311 * debugger, so not worth the effort of trying to services it here and we
312 * just lump it in with the handling of the others.
313 *
314 * To simplify the code and the register state management even more (wrt
315 * immediate in AND operation), we always update the flags and skip the
316 * extra check associated conditional jump.
317 */
318 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
319 <= UINT32_MAX);
320#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
321 AssertMsg( pReNative->idxCurCall == 0
322 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
323 IEMLIVENESSBIT_IDX_EFL_OTHER)),
324 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
325 IEMLIVENESSBIT_IDX_EFL_OTHER)));
326#endif
327
328 /*
329 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
330 * any pending register writes must be flushed.
331 */
332 off = iemNativeRegFlushPendingWrites(pReNative, off);
333
334 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
335 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
336 true /*fSkipLivenessAssert*/);
337 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
338 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
339 kIemNativeLabelType_ReturnWithFlags);
340 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
341 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
342
343 /* Free but don't flush the EFLAGS register. */
344 iemNativeRegFreeTmp(pReNative, idxEflReg);
345
346 return off;
347}
348
349
350/** Helper for iemNativeEmitFinishInstructionWithStatus. */
351DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
352{
353 unsigned const offOpcodes = pCallEntry->offOpcode;
354 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
355 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
356 {
357 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
358 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
359 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
360 }
361 AssertFailedReturn(NIL_RTGCPHYS);
362}
363
364
365/** The VINF_SUCCESS dummy. */
366template<int const a_rcNormal, bool const a_fIsJump>
367DECL_FORCE_INLINE_THROW(uint32_t)
368iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
369 int32_t const offJump)
370{
371 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
372 if (a_rcNormal != VINF_SUCCESS)
373 {
374#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
375 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
376#else
377 RT_NOREF_PV(pCallEntry);
378#endif
379
380 /* As this code returns from the TB any pending register writes must be flushed. */
381 off = iemNativeRegFlushPendingWrites(pReNative, off);
382
383 /*
384 * Use the lookup table for getting to the next TB quickly.
385 * Note! In this code path there can only be one entry at present.
386 */
387 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
388 PCIEMTB const pTbOrg = pReNative->pTbOrg;
389 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
390 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
391
392#if 0
393 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
394 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
395 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
396 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
397 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
398
399 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
400
401#else
402 /* Load the index as argument #1 for the helper call at the given label. */
403 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
404
405 /*
406 * Figure out the physical address of the current instruction and see
407 * whether the next instruction we're about to execute is in the same
408 * page so we by can optimistically skip TLB loading.
409 *
410 * - This is safe for all cases in FLAT mode.
411 * - In segmentmented modes it is complicated, given that a negative
412 * jump may underflow EIP and a forward jump may overflow or run into
413 * CS.LIM and triggering a #GP. The only thing we can get away with
414 * now at compile time is forward jumps w/o CS.LIM checks, since the
415 * lack of CS.LIM checks means we're good for the entire physical page
416 * we're executing on and another 15 bytes before we run into CS.LIM.
417 */
418 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
419# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
420 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
421# endif
422 )
423 {
424 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
425 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
426 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
427 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
428
429 {
430 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
431 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
432
433 /* Load the key lookup flags into the 2nd argument for the helper call.
434 - This is safe wrt CS limit checking since we're only here for FLAT modes.
435 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
436 interrupt shadow.
437 - The NMI inhibiting is more questionable, though... */
438 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
439 * Should we copy it into fExec to simplify this? OTOH, it's just a
440 * couple of extra instructions if EFLAGS are already in a register. */
441 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
442 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
443
444 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
445 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
446 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
447 }
448 }
449 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
450 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
451 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
452#endif
453 }
454 return off;
455}
456
457
458#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
459 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
460 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
461
462#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
463 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
464 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
465 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
466
467/** Same as iemRegAddToRip64AndFinishingNoFlags. */
468DECL_INLINE_THROW(uint32_t)
469iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
470{
471#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
472# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
473 if (!pReNative->Core.offPc)
474 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
475# endif
476
477 /* Allocate a temporary PC register. */
478 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
479
480 /* Perform the addition and store the result. */
481 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
482 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
483
484 /* Free but don't flush the PC register. */
485 iemNativeRegFreeTmp(pReNative, idxPcReg);
486#endif
487
488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
489 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
490
491 pReNative->Core.offPc += cbInstr;
492# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
493 off = iemNativePcAdjustCheck(pReNative, off);
494# endif
495 if (pReNative->cCondDepth)
496 off = iemNativeEmitPcWriteback(pReNative, off);
497 else
498 pReNative->Core.cInstrPcUpdateSkipped++;
499#endif
500
501 return off;
502}
503
504
505#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
506 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
507 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
508
509#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
510 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
511 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
512 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
513
514/** Same as iemRegAddToEip32AndFinishingNoFlags. */
515DECL_INLINE_THROW(uint32_t)
516iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
517{
518#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
519# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
520 if (!pReNative->Core.offPc)
521 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
522# endif
523
524 /* Allocate a temporary PC register. */
525 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
526
527 /* Perform the addition and store the result. */
528 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
529 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
530
531 /* Free but don't flush the PC register. */
532 iemNativeRegFreeTmp(pReNative, idxPcReg);
533#endif
534
535#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
536 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
537
538 pReNative->Core.offPc += cbInstr;
539# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
540 off = iemNativePcAdjustCheck(pReNative, off);
541# endif
542 if (pReNative->cCondDepth)
543 off = iemNativeEmitPcWriteback(pReNative, off);
544 else
545 pReNative->Core.cInstrPcUpdateSkipped++;
546#endif
547
548 return off;
549}
550
551
552#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
553 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
554 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
555
556#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
557 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
558 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
559 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
560
561/** Same as iemRegAddToIp16AndFinishingNoFlags. */
562DECL_INLINE_THROW(uint32_t)
563iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
564{
565#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
566# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
567 if (!pReNative->Core.offPc)
568 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
569# endif
570
571 /* Allocate a temporary PC register. */
572 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
573
574 /* Perform the addition and store the result. */
575 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
576 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
577 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
578
579 /* Free but don't flush the PC register. */
580 iemNativeRegFreeTmp(pReNative, idxPcReg);
581#endif
582
583#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
584 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
585
586 pReNative->Core.offPc += cbInstr;
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 off = iemNativePcAdjustCheck(pReNative, off);
589# endif
590 if (pReNative->cCondDepth)
591 off = iemNativeEmitPcWriteback(pReNative, off);
592 else
593 pReNative->Core.cInstrPcUpdateSkipped++;
594#endif
595
596 return off;
597}
598
599
600
601/*********************************************************************************************************************************
602* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
603*********************************************************************************************************************************/
604
605#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
606 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
607 (a_enmEffOpSize), pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
609
610#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
611 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
612 (a_enmEffOpSize), pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
615
616#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
618 IEMMODE_16BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
620
621#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
623 IEMMODE_16BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
626
627#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
628 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
629 IEMMODE_64BIT, pCallEntry->idxInstr); \
630 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
631
632#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
633 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
634 IEMMODE_64BIT, pCallEntry->idxInstr); \
635 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
636 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
637
638/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
639 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
640 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
641DECL_INLINE_THROW(uint32_t)
642iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
643 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
644{
645 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
646
647 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
648 off = iemNativeRegFlushPendingWrites(pReNative, off);
649
650#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
651 Assert(pReNative->Core.offPc == 0);
652
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 /* Allocate a temporary PC register. */
657 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
658
659 /* Perform the addition. */
660 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
661
662 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
663 {
664 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
665 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
666 }
667 else
668 {
669 /* Just truncate the result to 16-bit IP. */
670 Assert(enmEffOpSize == IEMMODE_16BIT);
671 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
672 }
673 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
674
675 /* Free but don't flush the PC register. */
676 iemNativeRegFreeTmp(pReNative, idxPcReg);
677
678 return off;
679}
680
681
682#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
683 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
684 (a_enmEffOpSize), pCallEntry->idxInstr); \
685 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
686
687#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
688 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
689 (a_enmEffOpSize), pCallEntry->idxInstr); \
690 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
691 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
692
693#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
694 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
695 IEMMODE_16BIT, pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
697
698#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
700 IEMMODE_16BIT, pCallEntry->idxInstr); \
701 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
702 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
703
704#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
705 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
706 IEMMODE_32BIT, pCallEntry->idxInstr); \
707 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
708
709#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
710 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
711 IEMMODE_32BIT, pCallEntry->idxInstr); \
712 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
713 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
714
715/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
716 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
717 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
718DECL_INLINE_THROW(uint32_t)
719iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
720 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
721{
722 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
723
724 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
725 off = iemNativeRegFlushPendingWrites(pReNative, off);
726
727#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
728 Assert(pReNative->Core.offPc == 0);
729
730 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
731#endif
732
733 /* Allocate a temporary PC register. */
734 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
735
736 /* Perform the addition. */
737 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
738
739 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
740 if (enmEffOpSize == IEMMODE_16BIT)
741 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
742
743 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
744/** @todo we can skip this in 32-bit FLAT mode. */
745 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
746
747 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
748
749 /* Free but don't flush the PC register. */
750 iemNativeRegFreeTmp(pReNative, idxPcReg);
751
752 return off;
753}
754
755
756#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
757 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
758 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
759
760#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
761 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
762 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
763 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
764
765#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
766 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
767 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
768
769#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
770 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
771 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
772 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
773
774#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
775 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
776 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
777
778#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
779 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
780 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
781 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
782
783/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
784DECL_INLINE_THROW(uint32_t)
785iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
786 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
787{
788 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
789 off = iemNativeRegFlushPendingWrites(pReNative, off);
790
791#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
792 Assert(pReNative->Core.offPc == 0);
793
794 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
795#endif
796
797 /* Allocate a temporary PC register. */
798 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
799
800 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
801 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
802 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
803 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
804 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
805
806 /* Free but don't flush the PC register. */
807 iemNativeRegFreeTmp(pReNative, idxPcReg);
808
809 return off;
810}
811
812
813
814/*********************************************************************************************************************************
815* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
816*********************************************************************************************************************************/
817
818/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
819#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
820 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
821
822/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
823#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
824 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
825
826/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
827#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
828 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
829
830/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
831 * clears flags. */
832#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
833 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
834 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
843 * clears flags. */
844#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
845 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
846 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
847
848#undef IEM_MC_SET_RIP_U16_AND_FINISH
849
850
851/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
852#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
853 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
854
855/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
856#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
857 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
858
859/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
860 * clears flags. */
861#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
862 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
864
865/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
866 * and clears flags. */
867#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
868 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
870
871#undef IEM_MC_SET_RIP_U32_AND_FINISH
872
873
874/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
875#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
876 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
877
878/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
879 * and clears flags. */
880#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
881 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
882 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
883
884#undef IEM_MC_SET_RIP_U64_AND_FINISH
885
886
887/** Same as iemRegRipJumpU16AndFinishNoFlags,
888 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
889DECL_INLINE_THROW(uint32_t)
890iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
891 uint8_t idxInstr, uint8_t cbVar)
892{
893 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
894 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
895
896 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
897 off = iemNativeRegFlushPendingWrites(pReNative, off);
898
899#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
900 Assert(pReNative->Core.offPc == 0);
901
902 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
903#endif
904
905 /* Get a register with the new PC loaded from idxVarPc.
906 Note! This ASSUMES that the high bits of the GPR is zeroed. */
907 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
908
909 /* Check limit (may #GP(0) + exit TB). */
910 if (!f64Bit)
911/** @todo we can skip this test in FLAT 32-bit mode. */
912 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
913 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
914 else if (cbVar > sizeof(uint32_t))
915 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
916
917 /* Store the result. */
918 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
919
920 iemNativeVarRegisterRelease(pReNative, idxVarPc);
921 /** @todo implictly free the variable? */
922
923 return off;
924}
925
926
927
928/*********************************************************************************************************************************
929* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
930*********************************************************************************************************************************/
931
932/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
933 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
934DECL_FORCE_INLINE_THROW(uint32_t)
935iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
936{
937 /* Use16BitSp: */
938#ifdef RT_ARCH_AMD64
939 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
940 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
941#else
942 /* sub regeff, regrsp, #cbMem */
943 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
944 /* and regeff, regeff, #0xffff */
945 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
946 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
947 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
948 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
949#endif
950 return off;
951}
952
953
954DECL_FORCE_INLINE(uint32_t)
955iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
956{
957 /* Use32BitSp: */
958 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
959 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
960 return off;
961}
962
963
964DECL_INLINE_THROW(uint32_t)
965iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
966 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
967{
968 /*
969 * Assert sanity.
970 */
971#ifdef VBOX_STRICT
972 if (RT_BYTE2(cBitsVarAndFlat) != 0)
973 {
974 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
975 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
976 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
977 Assert( pfnFunction
978 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
979 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
980 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
981 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
982 : UINT64_C(0xc000b000a0009000) ));
983 }
984 else
985 Assert( pfnFunction
986 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
987 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
988 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
989 : UINT64_C(0xc000b000a0009000) ));
990#endif
991
992#ifdef VBOX_STRICT
993 /*
994 * Check that the fExec flags we've got make sense.
995 */
996 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
997#endif
998
999 /*
1000 * To keep things simple we have to commit any pending writes first as we
1001 * may end up making calls.
1002 */
1003 /** @todo we could postpone this till we make the call and reload the
1004 * registers after returning from the call. Not sure if that's sensible or
1005 * not, though. */
1006 off = iemNativeRegFlushPendingWrites(pReNative, off);
1007
1008 /*
1009 * First we calculate the new RSP and the effective stack pointer value.
1010 * For 64-bit mode and flat 32-bit these two are the same.
1011 * (Code structure is very similar to that of PUSH)
1012 */
1013 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1014 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1015 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1016 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1017 ? cbMem : sizeof(uint16_t);
1018 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1019 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1020 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1021 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1022 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1023 if (cBitsFlat != 0)
1024 {
1025 Assert(idxRegEffSp == idxRegRsp);
1026 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1027 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1028 if (cBitsFlat == 64)
1029 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1030 else
1031 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1032 }
1033 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1034 {
1035 Assert(idxRegEffSp != idxRegRsp);
1036 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1037 kIemNativeGstRegUse_ReadOnly);
1038#ifdef RT_ARCH_AMD64
1039 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1040#else
1041 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1042#endif
1043 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1044 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1045 offFixupJumpToUseOtherBitSp = off;
1046 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1047 {
1048 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1049 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1050 }
1051 else
1052 {
1053 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1054 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1055 }
1056 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1057 }
1058 /* SpUpdateEnd: */
1059 uint32_t const offLabelSpUpdateEnd = off;
1060
1061 /*
1062 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1063 * we're skipping lookup).
1064 */
1065 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1066 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1067 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1068 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1069 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1070 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1071 : UINT32_MAX;
1072 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1073
1074
1075 if (!TlbState.fSkip)
1076 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1077 else
1078 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1079
1080 /*
1081 * Use16BitSp:
1082 */
1083 if (cBitsFlat == 0)
1084 {
1085#ifdef RT_ARCH_AMD64
1086 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1087#else
1088 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1089#endif
1090 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1091 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1092 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1093 else
1094 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1095 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1097 }
1098
1099 /*
1100 * TlbMiss:
1101 *
1102 * Call helper to do the pushing.
1103 */
1104 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1105
1106#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1107 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1108#else
1109 RT_NOREF(idxInstr);
1110#endif
1111
1112 /* Save variables in volatile registers. */
1113 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1114 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1115 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1116 | (RT_BIT_32(idxRegPc));
1117 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1118
1119 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1120 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1121 {
1122 /* Swap them using ARG0 as temp register: */
1123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1125 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1126 }
1127 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1128 {
1129 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1130 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1131
1132 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1133 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1134 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1135 }
1136 else
1137 {
1138 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1140
1141 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1142 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1143 }
1144
1145 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1146 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1147
1148 /* Done setting up parameters, make the call. */
1149 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1150
1151 /* Restore variables and guest shadow registers to volatile registers. */
1152 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1153 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1154
1155#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1156 if (!TlbState.fSkip)
1157 {
1158 /* end of TlbMiss - Jump to the done label. */
1159 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1161
1162 /*
1163 * TlbLookup:
1164 */
1165 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1166 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1167
1168 /*
1169 * Emit code to do the actual storing / fetching.
1170 */
1171 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1172# ifdef IEM_WITH_TLB_STATISTICS
1173 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1174 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1175# endif
1176 switch (cbMemAccess)
1177 {
1178 case 2:
1179 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1180 break;
1181 case 4:
1182 if (!fIsIntelSeg)
1183 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1184 else
1185 {
1186 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1187 PUSH FS in real mode, so we have to try emulate that here.
1188 We borrow the now unused idxReg1 from the TLB lookup code here. */
1189 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1190 kIemNativeGstReg_EFlags);
1191 if (idxRegEfl != UINT8_MAX)
1192 {
1193#ifdef ARCH_AMD64
1194 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1195 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1196 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1197#else
1198 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1199 off, TlbState.idxReg1, idxRegEfl,
1200 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1201#endif
1202 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1203 }
1204 else
1205 {
1206 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1207 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1208 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1209 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1210 }
1211 /* ASSUMES the upper half of idxRegPc is ZERO. */
1212 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1213 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1214 }
1215 break;
1216 case 8:
1217 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1218 break;
1219 default:
1220 AssertFailed();
1221 }
1222
1223 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1224 TlbState.freeRegsAndReleaseVars(pReNative);
1225
1226 /*
1227 * TlbDone:
1228 *
1229 * Commit the new RSP value.
1230 */
1231 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1232 }
1233#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1234
1235#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1236 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1237#endif
1238 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1239 if (idxRegEffSp != idxRegRsp)
1240 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1241
1242 return off;
1243}
1244
1245
1246/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1247#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1248 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1249
1250/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1251 * clears flags. */
1252#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1253 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1254 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1255
1256/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1257#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1258 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1259
1260/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1261 * clears flags. */
1262#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1263 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1264 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1265
1266#undef IEM_MC_IND_CALL_U16_AND_FINISH
1267
1268
1269/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1270#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1271 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1272
1273/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1274 * clears flags. */
1275#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1276 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1277 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1278
1279#undef IEM_MC_IND_CALL_U32_AND_FINISH
1280
1281
1282/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1283 * an extra parameter, for use in 64-bit code. */
1284#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1285 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1286
1287
1288/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1289 * an extra parameter, for use in 64-bit code and we need to check and clear
1290 * flags. */
1291#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1292 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1293 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1294
1295#undef IEM_MC_IND_CALL_U64_AND_FINISH
1296
1297/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1298 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1299DECL_INLINE_THROW(uint32_t)
1300iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1301 uint8_t idxInstr, uint8_t cbVar)
1302{
1303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1304 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1305
1306 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1307 off = iemNativeRegFlushPendingWrites(pReNative, off);
1308
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311
1312 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1313#endif
1314
1315 /* Get a register with the new PC loaded from idxVarPc.
1316 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1317 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1318
1319 /* Check limit (may #GP(0) + exit TB). */
1320 if (!f64Bit)
1321/** @todo we can skip this test in FLAT 32-bit mode. */
1322 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1323 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1324 else if (cbVar > sizeof(uint32_t))
1325 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1326
1327#if 1
1328 /* Allocate a temporary PC register, we don't want it shadowed. */
1329 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1330 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1331#else
1332 /* Allocate a temporary PC register. */
1333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1334 true /*fNoVolatileRegs*/);
1335#endif
1336
1337 /* Perform the addition and push the variable to the guest stack. */
1338 /** @todo Flat variants for PC32 variants. */
1339 switch (cbVar)
1340 {
1341 case sizeof(uint16_t):
1342 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1343 /* Truncate the result to 16-bit IP. */
1344 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1345 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1346 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1347 break;
1348 case sizeof(uint32_t):
1349 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1350 /** @todo In FLAT mode we can use the flat variant. */
1351 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1352 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1353 break;
1354 case sizeof(uint64_t):
1355 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1356 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1357 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1358 break;
1359 default:
1360 AssertFailed();
1361 }
1362
1363 /* RSP got changed, so do this again. */
1364 off = iemNativeRegFlushPendingWrites(pReNative, off);
1365
1366 /* Store the result. */
1367 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1368
1369#if 1
1370 /* Need to transfer the shadow information to the new RIP register. */
1371 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1372#else
1373 /* Sync the new PC. */
1374 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1375#endif
1376 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1377 iemNativeRegFreeTmp(pReNative, idxPcReg);
1378 /** @todo implictly free the variable? */
1379
1380 return off;
1381}
1382
1383
1384/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1385 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1386#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1387 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1388
1389/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1390 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1391 * flags. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1393 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1394 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1395
1396/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1397 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1399 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1400
1401/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1402 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1403 * flags. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1405 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1406 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1407
1408/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1409 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1411 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1412
1413/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1414 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1415 * flags. */
1416#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1417 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1418 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1419
1420#undef IEM_MC_REL_CALL_S16_AND_FINISH
1421
1422/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1423 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1424DECL_INLINE_THROW(uint32_t)
1425iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1426 uint8_t idxInstr)
1427{
1428 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1429 off = iemNativeRegFlushPendingWrites(pReNative, off);
1430
1431#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1432 Assert(pReNative->Core.offPc == 0);
1433
1434 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1435#endif
1436
1437 /* Allocate a temporary PC register. */
1438 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1439 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1440 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1441
1442 /* Calculate the new RIP. */
1443 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1444 /* Truncate the result to 16-bit IP. */
1445 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1446 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1447 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1448
1449 /* Truncate the result to 16-bit IP. */
1450 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1451
1452 /* Check limit (may #GP(0) + exit TB). */
1453 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1454
1455 /* Perform the addition and push the variable to the guest stack. */
1456 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1457 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1458
1459 /* RSP got changed, so flush again. */
1460 off = iemNativeRegFlushPendingWrites(pReNative, off);
1461
1462 /* Store the result. */
1463 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1464
1465 /* Need to transfer the shadow information to the new RIP register. */
1466 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1467 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1468 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1469
1470 return off;
1471}
1472
1473
1474/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1475 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1476#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1477 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1478
1479/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1480 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1481 * flags. */
1482#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1483 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1484 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1485
1486#undef IEM_MC_REL_CALL_S32_AND_FINISH
1487
1488/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1489 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1490DECL_INLINE_THROW(uint32_t)
1491iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1492 uint8_t idxInstr)
1493{
1494 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1495 off = iemNativeRegFlushPendingWrites(pReNative, off);
1496
1497#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1498 Assert(pReNative->Core.offPc == 0);
1499
1500 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1501#endif
1502
1503 /* Allocate a temporary PC register. */
1504 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1505 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1506 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1507
1508 /* Update the EIP to get the return address. */
1509 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1510
1511 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1512 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1513 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1514 /** @todo we can skip this test in FLAT 32-bit mode. */
1515 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1516
1517 /* Perform Perform the return address to the guest stack. */
1518 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1519 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1520 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1521
1522 /* RSP got changed, so do this again. */
1523 off = iemNativeRegFlushPendingWrites(pReNative, off);
1524
1525 /* Store the result. */
1526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1527
1528 /* Need to transfer the shadow information to the new RIP register. */
1529 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1530 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1531 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1532
1533 return off;
1534}
1535
1536
1537/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1538 * an extra parameter, for use in 64-bit code. */
1539#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1540 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1541
1542/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1543 * an extra parameter, for use in 64-bit code and we need to check and clear
1544 * flags. */
1545#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1546 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1548
1549#undef IEM_MC_REL_CALL_S64_AND_FINISH
1550
1551/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1552 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1553DECL_INLINE_THROW(uint32_t)
1554iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1555 uint8_t idxInstr)
1556{
1557 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1558 off = iemNativeRegFlushPendingWrites(pReNative, off);
1559
1560#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1561 Assert(pReNative->Core.offPc == 0);
1562
1563 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1564#endif
1565
1566 /* Allocate a temporary PC register. */
1567 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1568 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1569 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1570
1571 /* Update the RIP to get the return address. */
1572 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1573
1574 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1576 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1578
1579 /* Perform Perform the return address to the guest stack. */
1580 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1581 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1582
1583 /* RSP got changed, so do this again. */
1584 off = iemNativeRegFlushPendingWrites(pReNative, off);
1585
1586 /* Store the result. */
1587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1588
1589 /* Need to transfer the shadow information to the new RIP register. */
1590 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1591 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1592 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1593
1594 return off;
1595}
1596
1597
1598/*********************************************************************************************************************************
1599* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1600*********************************************************************************************************************************/
1601
1602DECL_FORCE_INLINE_THROW(uint32_t)
1603iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1604 uint16_t cbPopAdd, uint8_t idxRegTmp)
1605{
1606 /* Use16BitSp: */
1607#ifdef RT_ARCH_AMD64
1608 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1609 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1610 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1611 RT_NOREF(idxRegTmp);
1612#elif defined(RT_ARCH_ARM64)
1613 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1614 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1615 /* add tmp, regrsp, #cbMem */
1616 uint16_t const cbCombined = cbMem + cbPopAdd;
1617 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1618 if (cbCombined >= RT_BIT_32(12))
1619 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1620 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1621 /* and tmp, tmp, #0xffff */
1622 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1623 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1624 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1625 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1626#else
1627# error "Port me"
1628#endif
1629 return off;
1630}
1631
1632
1633DECL_FORCE_INLINE_THROW(uint32_t)
1634iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1635 uint16_t cbPopAdd)
1636{
1637 /* Use32BitSp: */
1638 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1639 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1640 return off;
1641}
1642
1643
1644/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1645#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1646 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1647
1648/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1649#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1650 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1651
1652/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1653#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1654 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1655
1656/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1657 * clears flags. */
1658#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1659 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1669 * clears flags. */
1670#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1671 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1672 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1673
1674/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1675DECL_INLINE_THROW(uint32_t)
1676iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1677 IEMMODE enmEffOpSize, uint8_t idxInstr)
1678{
1679 RT_NOREF(cbInstr);
1680
1681#ifdef VBOX_STRICT
1682 /*
1683 * Check that the fExec flags we've got make sense.
1684 */
1685 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1686#endif
1687
1688 /*
1689 * To keep things simple we have to commit any pending writes first as we
1690 * may end up making calls.
1691 */
1692 off = iemNativeRegFlushPendingWrites(pReNative, off);
1693
1694 /*
1695 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1696 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1697 * directly as the effective stack pointer.
1698 * (Code structure is very similar to that of PUSH)
1699 *
1700 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1701 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1702 * aren't commonly used (or useful) and thus not in need of optimizing.
1703 *
1704 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1705 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1706 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1707 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1708 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1709 */
1710 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1711 ? sizeof(uint64_t)
1712 : enmEffOpSize == IEMMODE_32BIT
1713 ? sizeof(uint32_t)
1714 : sizeof(uint16_t);
1715 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1716 uintptr_t const pfnFunction = fFlat
1717 ? enmEffOpSize == IEMMODE_64BIT
1718 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1719 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1720 : enmEffOpSize == IEMMODE_32BIT
1721 ? (uintptr_t)iemNativeHlpStackFetchU32
1722 : (uintptr_t)iemNativeHlpStackFetchU16;
1723 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1724 fFlat ? kIemNativeGstRegUse_ForUpdate
1725 : kIemNativeGstRegUse_Calculation,
1726 true /*fNoVolatileRegs*/);
1727 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1728 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1729 * will be the resulting register value. */
1730 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1731
1732 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1733 if (fFlat)
1734 Assert(idxRegEffSp == idxRegRsp);
1735 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1736 {
1737 Assert(idxRegEffSp != idxRegRsp);
1738 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1739 kIemNativeGstRegUse_ReadOnly);
1740#ifdef RT_ARCH_AMD64
1741 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1742#else
1743 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1744#endif
1745 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1746 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1747 offFixupJumpToUseOtherBitSp = off;
1748 if (enmEffOpSize == IEMMODE_32BIT)
1749 {
1750 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1751 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1752 }
1753 else
1754 {
1755 Assert(enmEffOpSize == IEMMODE_16BIT);
1756 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1757 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1758 idxRegMemResult);
1759 }
1760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1761 }
1762 /* SpUpdateEnd: */
1763 uint32_t const offLabelSpUpdateEnd = off;
1764
1765 /*
1766 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1767 * we're skipping lookup).
1768 */
1769 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1770 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1771 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1772 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1773 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1774 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1775 : UINT32_MAX;
1776
1777 if (!TlbState.fSkip)
1778 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1779 else
1780 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1781
1782 /*
1783 * Use16BitSp:
1784 */
1785 if (!fFlat)
1786 {
1787#ifdef RT_ARCH_AMD64
1788 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1789#else
1790 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1791#endif
1792 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1793 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1794 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1795 idxRegMemResult);
1796 else
1797 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1798 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1800 }
1801
1802 /*
1803 * TlbMiss:
1804 *
1805 * Call helper to do the pushing.
1806 */
1807 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1808
1809#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1810 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1811#else
1812 RT_NOREF(idxInstr);
1813#endif
1814
1815 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1816 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1817 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1818 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1819
1820
1821 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1822 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1823 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1824
1825 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1827
1828 /* Done setting up parameters, make the call. */
1829 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1830
1831 /* Move the return register content to idxRegMemResult. */
1832 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1834
1835 /* Restore variables and guest shadow registers to volatile registers. */
1836 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1837 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1838
1839#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1840 if (!TlbState.fSkip)
1841 {
1842 /* end of TlbMiss - Jump to the done label. */
1843 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1844 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1845
1846 /*
1847 * TlbLookup:
1848 */
1849 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1850 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1851
1852 /*
1853 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1854 */
1855 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1856# ifdef IEM_WITH_TLB_STATISTICS
1857 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1858 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1859# endif
1860 switch (cbMem)
1861 {
1862 case 2:
1863 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1864 break;
1865 case 4:
1866 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1867 break;
1868 case 8:
1869 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1870 break;
1871 default:
1872 AssertFailed();
1873 }
1874
1875 TlbState.freeRegsAndReleaseVars(pReNative);
1876
1877 /*
1878 * TlbDone:
1879 *
1880 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1881 * commit the popped register value.
1882 */
1883 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1884 }
1885#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1886
1887 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1888 if (!f64Bit)
1889/** @todo we can skip this test in FLAT 32-bit mode. */
1890 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1891 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1892 else if (enmEffOpSize == IEMMODE_64BIT)
1893 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1894
1895 /* Complete RSP calculation for FLAT mode. */
1896 if (idxRegEffSp == idxRegRsp)
1897 {
1898 if (enmEffOpSize == IEMMODE_64BIT)
1899 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1900 else
1901 {
1902 Assert(enmEffOpSize == IEMMODE_32BIT);
1903 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1904 }
1905 }
1906
1907 /* Commit the result and clear any current guest shadows for RIP. */
1908 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1909 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1910 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1911
1912 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1913 if (!fFlat)
1914 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1915
1916 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1917 if (idxRegEffSp != idxRegRsp)
1918 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1919 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1920 return off;
1921}
1922
1923
1924/*********************************************************************************************************************************
1925* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1926*********************************************************************************************************************************/
1927
1928#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1929 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1930
1931/**
1932 * Emits code to check if a \#NM exception should be raised.
1933 *
1934 * @returns New code buffer offset, UINT32_MAX on failure.
1935 * @param pReNative The native recompile state.
1936 * @param off The code buffer offset.
1937 * @param idxInstr The current instruction.
1938 */
1939DECL_INLINE_THROW(uint32_t)
1940iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1941{
1942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1943 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1944
1945 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1946 {
1947#endif
1948 /*
1949 * Make sure we don't have any outstanding guest register writes as we may
1950 * raise an #NM and all guest register must be up to date in CPUMCTX.
1951 */
1952 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1953 off = iemNativeRegFlushPendingWrites(pReNative, off);
1954
1955#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1956 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1957#else
1958 RT_NOREF(idxInstr);
1959#endif
1960
1961 /* Allocate a temporary CR0 register. */
1962 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
1963 kIemNativeGstRegUse_ReadOnly);
1964
1965 /*
1966 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1967 * return raisexcpt();
1968 */
1969 /* Test and jump. */
1970 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
1971 kIemNativeLabelType_RaiseNm);
1972
1973 /* Free but don't flush the CR0 register. */
1974 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1975
1976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1977 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1978 }
1979 else
1980 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1981#endif
1982
1983 return off;
1984}
1985
1986
1987#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1988 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1989
1990/**
1991 * Emits code to check if a \#NM exception should be raised.
1992 *
1993 * @returns New code buffer offset, UINT32_MAX on failure.
1994 * @param pReNative The native recompile state.
1995 * @param off The code buffer offset.
1996 * @param idxInstr The current instruction.
1997 */
1998DECL_INLINE_THROW(uint32_t)
1999iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2000{
2001#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2002 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2003
2004 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2005 {
2006#endif
2007 /*
2008 * Make sure we don't have any outstanding guest register writes as we may
2009 * raise an #NM and all guest register must be up to date in CPUMCTX.
2010 */
2011 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2012 off = iemNativeRegFlushPendingWrites(pReNative, off);
2013
2014#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2015 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2016#else
2017 RT_NOREF(idxInstr);
2018#endif
2019
2020 /* Allocate a temporary CR0 register. */
2021 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2022 kIemNativeGstRegUse_Calculation);
2023
2024 /*
2025 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2026 * return raisexcpt();
2027 */
2028 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2029 /* Test and jump. */
2030 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2031 kIemNativeLabelType_RaiseNm);
2032
2033 /* Free the CR0 register. */
2034 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2035
2036#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2037 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2038 }
2039 else
2040 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2041#endif
2042
2043 return off;
2044}
2045
2046
2047#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2048 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2049
2050/**
2051 * Emits code to check if a \#MF exception should be raised.
2052 *
2053 * @returns New code buffer offset, UINT32_MAX on failure.
2054 * @param pReNative The native recompile state.
2055 * @param off The code buffer offset.
2056 * @param idxInstr The current instruction.
2057 */
2058DECL_INLINE_THROW(uint32_t)
2059iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2060{
2061 /*
2062 * Make sure we don't have any outstanding guest register writes as we may
2063 * raise an #MF and all guest register must be up to date in CPUMCTX.
2064 */
2065 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2066 off = iemNativeRegFlushPendingWrites(pReNative, off);
2067
2068#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2069 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2070#else
2071 RT_NOREF(idxInstr);
2072#endif
2073
2074 /* Allocate a temporary FSW register. */
2075 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2076 kIemNativeGstRegUse_ReadOnly);
2077
2078 /*
2079 * if (FSW & X86_FSW_ES != 0)
2080 * return raisexcpt();
2081 */
2082 /* Test and jump. */
2083 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2084
2085 /* Free but don't flush the FSW register. */
2086 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2087
2088 return off;
2089}
2090
2091
2092#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2093 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2094
2095/**
2096 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2097 *
2098 * @returns New code buffer offset, UINT32_MAX on failure.
2099 * @param pReNative The native recompile state.
2100 * @param off The code buffer offset.
2101 * @param idxInstr The current instruction.
2102 */
2103DECL_INLINE_THROW(uint32_t)
2104iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2105{
2106#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2107 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2108
2109 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2110 {
2111#endif
2112 /*
2113 * Make sure we don't have any outstanding guest register writes as we may
2114 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2115 */
2116 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2117 off = iemNativeRegFlushPendingWrites(pReNative, off);
2118
2119#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2120 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2121#else
2122 RT_NOREF(idxInstr);
2123#endif
2124
2125 /* Allocate a temporary CR0 and CR4 register. */
2126 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2127 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2128 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2129
2130 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2131#ifdef RT_ARCH_AMD64
2132 /*
2133 * We do a modified test here:
2134 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2135 * else { goto RaiseSseRelated; }
2136 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2137 * all targets except the 386, which doesn't support SSE, this should
2138 * be a safe assumption.
2139 */
2140 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2141 //pCodeBuf[off++] = 0xcc;
2142 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2143 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2144 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2145 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2146 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2147 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2148
2149#elif defined(RT_ARCH_ARM64)
2150 /*
2151 * We do a modified test here:
2152 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2153 * else { goto RaiseSseRelated; }
2154 */
2155 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2156 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2157 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2158 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2159 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2160 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2161 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2162 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2163 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2164 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2165 kIemNativeLabelType_RaiseSseRelated);
2166
2167#else
2168# error "Port me!"
2169#endif
2170
2171 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2172 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2173 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2174 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2175
2176#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2177 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2178 }
2179 else
2180 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2181#endif
2182
2183 return off;
2184}
2185
2186
2187#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2188 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2189
2190/**
2191 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2192 *
2193 * @returns New code buffer offset, UINT32_MAX on failure.
2194 * @param pReNative The native recompile state.
2195 * @param off The code buffer offset.
2196 * @param idxInstr The current instruction.
2197 */
2198DECL_INLINE_THROW(uint32_t)
2199iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2200{
2201#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2202 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2203
2204 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2205 {
2206#endif
2207 /*
2208 * Make sure we don't have any outstanding guest register writes as we may
2209 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2210 */
2211 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2212 off = iemNativeRegFlushPendingWrites(pReNative, off);
2213
2214#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2215 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2216#else
2217 RT_NOREF(idxInstr);
2218#endif
2219
2220 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2221 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2222 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2223 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2224 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2225
2226 /*
2227 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2228 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2229 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2230 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2231 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2232 * { likely }
2233 * else { goto RaiseAvxRelated; }
2234 */
2235#ifdef RT_ARCH_AMD64
2236 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2237 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2238 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2239 ^ 0x1a) ) { likely }
2240 else { goto RaiseAvxRelated; } */
2241 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2242 //pCodeBuf[off++] = 0xcc;
2243 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2244 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2245 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2246 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2247 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2248 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2249 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2250 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2251 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2252 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2253 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2254
2255#elif defined(RT_ARCH_ARM64)
2256 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2257 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2258 else { goto RaiseAvxRelated; } */
2259 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2260 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2261 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2262 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2263 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2264 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2265 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2266 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2267 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2268 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2269 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2270 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2271 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2272 kIemNativeLabelType_RaiseAvxRelated);
2273
2274#else
2275# error "Port me!"
2276#endif
2277
2278 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2279 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2280 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2281 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2282#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2283 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2284 }
2285 else
2286 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2287#endif
2288
2289 return off;
2290}
2291
2292
2293#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2294#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2295 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2296
2297/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2300{
2301 /*
2302 * Make sure we don't have any outstanding guest register writes as we may
2303 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2304 */
2305 off = iemNativeRegFlushPendingWrites(pReNative, off);
2306
2307#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2308 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2309#else
2310 RT_NOREF(idxInstr);
2311#endif
2312
2313 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2314 kIemNativeGstRegUse_ReadOnly);
2315 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2316
2317 /* mov tmp, varmxcsr */
2318 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2319 /* tmp &= X86_MXCSR_XCPT_MASK */
2320 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2321 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2322 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2323 /* tmp = ~tmp */
2324 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2325 /* tmp &= mxcsr */
2326 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2327 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2328 kIemNativeLabelType_RaiseSseAvxFpRelated);
2329
2330 /* Free but don't flush the MXCSR register. */
2331 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2332 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2333
2334 return off;
2335}
2336#endif
2337
2338
2339#define IEM_MC_RAISE_DIVIDE_ERROR() \
2340 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2341
2342/**
2343 * Emits code to raise a \#DE.
2344 *
2345 * @returns New code buffer offset, UINT32_MAX on failure.
2346 * @param pReNative The native recompile state.
2347 * @param off The code buffer offset.
2348 * @param idxInstr The current instruction.
2349 */
2350DECL_INLINE_THROW(uint32_t)
2351iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2352{
2353 /*
2354 * Make sure we don't have any outstanding guest register writes as we may
2355 */
2356 off = iemNativeRegFlushPendingWrites(pReNative, off);
2357
2358#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2359 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2360#else
2361 RT_NOREF(idxInstr);
2362#endif
2363
2364 /* raise \#DE exception unconditionally. */
2365 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2366}
2367
2368
2369#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2370 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2371
2372/**
2373 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2374 *
2375 * @returns New code buffer offset, UINT32_MAX on failure.
2376 * @param pReNative The native recompile state.
2377 * @param off The code buffer offset.
2378 * @param idxInstr The current instruction.
2379 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2380 * @param cbAlign The alignment in bytes to check against.
2381 */
2382DECL_INLINE_THROW(uint32_t)
2383iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2384 uint8_t idxVarEffAddr, uint8_t cbAlign)
2385{
2386 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2387 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2388
2389 /*
2390 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2391 */
2392 off = iemNativeRegFlushPendingWrites(pReNative, off);
2393
2394#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2395 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2396#else
2397 RT_NOREF(idxInstr);
2398#endif
2399
2400 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2401
2402 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2403 kIemNativeLabelType_RaiseGp0);
2404
2405 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2406 return off;
2407}
2408
2409
2410/*********************************************************************************************************************************
2411* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2412*********************************************************************************************************************************/
2413
2414/**
2415 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2416 *
2417 * @returns Pointer to the condition stack entry on success, NULL on failure
2418 * (too many nestings)
2419 */
2420DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2421{
2422#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2423 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2424#endif
2425
2426 uint32_t const idxStack = pReNative->cCondDepth;
2427 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2428
2429 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2430 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2431
2432 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2433 pEntry->fInElse = false;
2434 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2435 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2436
2437 return pEntry;
2438}
2439
2440
2441/**
2442 * Start of the if-block, snapshotting the register and variable state.
2443 */
2444DECL_INLINE_THROW(void)
2445iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2446{
2447 Assert(offIfBlock != UINT32_MAX);
2448 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2449 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2450 Assert(!pEntry->fInElse);
2451
2452 /* Define the start of the IF block if request or for disassembly purposes. */
2453 if (idxLabelIf != UINT32_MAX)
2454 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2455#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2456 else
2457 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2458#else
2459 RT_NOREF(offIfBlock);
2460#endif
2461
2462#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2463 Assert(pReNative->Core.offPc == 0);
2464#endif
2465
2466 /* Copy the initial state so we can restore it in the 'else' block. */
2467 pEntry->InitialState = pReNative->Core;
2468}
2469
2470
2471#define IEM_MC_ELSE() } while (0); \
2472 off = iemNativeEmitElse(pReNative, off); \
2473 do {
2474
2475/** Emits code related to IEM_MC_ELSE. */
2476DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2477{
2478 /* Check sanity and get the conditional stack entry. */
2479 Assert(off != UINT32_MAX);
2480 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2481 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2482 Assert(!pEntry->fInElse);
2483
2484#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2485 /* Writeback any dirty shadow registers. */
2486 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2487 * in one of the branches and leave guest registers already dirty before the start of the if
2488 * block alone. */
2489 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2490#endif
2491
2492 /* Jump to the endif */
2493 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2494
2495 /* Define the else label and enter the else part of the condition. */
2496 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2497 pEntry->fInElse = true;
2498
2499#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2500 Assert(pReNative->Core.offPc == 0);
2501#endif
2502
2503 /* Snapshot the core state so we can do a merge at the endif and restore
2504 the snapshot we took at the start of the if-block. */
2505 pEntry->IfFinalState = pReNative->Core;
2506 pReNative->Core = pEntry->InitialState;
2507
2508 return off;
2509}
2510
2511
2512#define IEM_MC_ENDIF() } while (0); \
2513 off = iemNativeEmitEndIf(pReNative, off)
2514
2515/** Emits code related to IEM_MC_ENDIF. */
2516DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2517{
2518 /* Check sanity and get the conditional stack entry. */
2519 Assert(off != UINT32_MAX);
2520 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2521 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2522
2523#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2524 Assert(pReNative->Core.offPc == 0);
2525#endif
2526#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2527 /* Writeback any dirty shadow registers (else branch). */
2528 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2529 * in one of the branches and leave guest registers already dirty before the start of the if
2530 * block alone. */
2531 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2532#endif
2533
2534 /*
2535 * Now we have find common group with the core state at the end of the
2536 * if-final. Use the smallest common denominator and just drop anything
2537 * that isn't the same in both states.
2538 */
2539 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2540 * which is why we're doing this at the end of the else-block.
2541 * But we'd need more info about future for that to be worth the effort. */
2542 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2543#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2544 Assert( pOther->bmGstRegShadowDirty == 0
2545 && pReNative->Core.bmGstRegShadowDirty == 0);
2546#endif
2547
2548 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2549 {
2550 /* shadow guest stuff first. */
2551 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2552 if (fGstRegs)
2553 {
2554 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2555 do
2556 {
2557 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2558 fGstRegs &= ~RT_BIT_64(idxGstReg);
2559
2560 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2561 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2562 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2563 {
2564 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2565 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2566
2567#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2568 /* Writeback any dirty shadow registers we are about to unshadow. */
2569 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2570#endif
2571 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2572 }
2573 } while (fGstRegs);
2574 }
2575 else
2576 {
2577 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2578#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2579 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2580#endif
2581 }
2582
2583 /* Check variables next. For now we must require them to be identical
2584 or stuff we can recreate. */
2585 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2586 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2587 if (fVars)
2588 {
2589 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2590 do
2591 {
2592 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2593 fVars &= ~RT_BIT_32(idxVar);
2594
2595 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2596 {
2597 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2598 continue;
2599 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2600 {
2601 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2602 if (idxHstReg != UINT8_MAX)
2603 {
2604 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2605 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2606 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2607 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2608 }
2609 continue;
2610 }
2611 }
2612 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2613 continue;
2614
2615 /* Irreconcilable, so drop it. */
2616 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2617 if (idxHstReg != UINT8_MAX)
2618 {
2619 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2620 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2621 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2622 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2623 }
2624 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2625 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2626 } while (fVars);
2627 }
2628
2629 /* Finally, check that the host register allocations matches. */
2630 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2631 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2632 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2633 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2634 }
2635
2636 /*
2637 * Define the endif label and maybe the else one if we're still in the 'if' part.
2638 */
2639 if (!pEntry->fInElse)
2640 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2641 else
2642 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2643 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2644
2645 /* Pop the conditional stack.*/
2646 pReNative->cCondDepth -= 1;
2647
2648 return off;
2649}
2650
2651
2652#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2653 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2654 do {
2655
2656/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2657DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2658{
2659 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2660 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2661
2662 /* Get the eflags. */
2663 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2664 kIemNativeGstRegUse_ReadOnly);
2665
2666 /* Test and jump. */
2667 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2668
2669 /* Free but don't flush the EFlags register. */
2670 iemNativeRegFreeTmp(pReNative, idxEflReg);
2671
2672 /* Make a copy of the core state now as we start the if-block. */
2673 iemNativeCondStartIfBlock(pReNative, off);
2674
2675 return off;
2676}
2677
2678
2679#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2680 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2681 do {
2682
2683/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2684DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2685{
2686 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2687 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2688
2689 /* Get the eflags. */
2690 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2691 kIemNativeGstRegUse_ReadOnly);
2692
2693 /* Test and jump. */
2694 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2695
2696 /* Free but don't flush the EFlags register. */
2697 iemNativeRegFreeTmp(pReNative, idxEflReg);
2698
2699 /* Make a copy of the core state now as we start the if-block. */
2700 iemNativeCondStartIfBlock(pReNative, off);
2701
2702 return off;
2703}
2704
2705
2706#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2707 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2708 do {
2709
2710/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2711DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2712{
2713 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2714 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2715
2716 /* Get the eflags. */
2717 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2718 kIemNativeGstRegUse_ReadOnly);
2719
2720 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2721 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2722
2723 /* Test and jump. */
2724 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2725
2726 /* Free but don't flush the EFlags register. */
2727 iemNativeRegFreeTmp(pReNative, idxEflReg);
2728
2729 /* Make a copy of the core state now as we start the if-block. */
2730 iemNativeCondStartIfBlock(pReNative, off);
2731
2732 return off;
2733}
2734
2735
2736#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2737 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2738 do {
2739
2740/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2741DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2742{
2743 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2744 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2745
2746 /* Get the eflags. */
2747 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2748 kIemNativeGstRegUse_ReadOnly);
2749
2750 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2751 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2752
2753 /* Test and jump. */
2754 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2755
2756 /* Free but don't flush the EFlags register. */
2757 iemNativeRegFreeTmp(pReNative, idxEflReg);
2758
2759 /* Make a copy of the core state now as we start the if-block. */
2760 iemNativeCondStartIfBlock(pReNative, off);
2761
2762 return off;
2763}
2764
2765
2766#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2767 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2768 do {
2769
2770#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2771 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2772 do {
2773
2774/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2775DECL_INLINE_THROW(uint32_t)
2776iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2777 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2778{
2779 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2780 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2781
2782 /* Get the eflags. */
2783 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2784 kIemNativeGstRegUse_ReadOnly);
2785
2786 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2787 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2788
2789 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2790 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2791 Assert(iBitNo1 != iBitNo2);
2792
2793#ifdef RT_ARCH_AMD64
2794 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2795
2796 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2797 if (iBitNo1 > iBitNo2)
2798 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2799 else
2800 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2801 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2802
2803#elif defined(RT_ARCH_ARM64)
2804 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2805 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2806
2807 /* and tmpreg, eflreg, #1<<iBitNo1 */
2808 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2809
2810 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2811 if (iBitNo1 > iBitNo2)
2812 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2813 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2814 else
2815 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2816 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2817
2818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2819
2820#else
2821# error "Port me"
2822#endif
2823
2824 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2825 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2826 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2827
2828 /* Free but don't flush the EFlags and tmp registers. */
2829 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2830 iemNativeRegFreeTmp(pReNative, idxEflReg);
2831
2832 /* Make a copy of the core state now as we start the if-block. */
2833 iemNativeCondStartIfBlock(pReNative, off);
2834
2835 return off;
2836}
2837
2838
2839#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2840 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2841 do {
2842
2843#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2844 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2845 do {
2846
2847/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2848 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2849DECL_INLINE_THROW(uint32_t)
2850iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2851 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2852{
2853 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2854 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2855
2856 /* We need an if-block label for the non-inverted variant. */
2857 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2858 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2859
2860 /* Get the eflags. */
2861 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2862 kIemNativeGstRegUse_ReadOnly);
2863
2864 /* Translate the flag masks to bit numbers. */
2865 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2866 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2867
2868 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2869 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2870 Assert(iBitNo1 != iBitNo);
2871
2872 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2873 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2874 Assert(iBitNo2 != iBitNo);
2875 Assert(iBitNo2 != iBitNo1);
2876
2877#ifdef RT_ARCH_AMD64
2878 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2879#elif defined(RT_ARCH_ARM64)
2880 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2881#endif
2882
2883 /* Check for the lone bit first. */
2884 if (!fInverted)
2885 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2886 else
2887 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2888
2889 /* Then extract and compare the other two bits. */
2890#ifdef RT_ARCH_AMD64
2891 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2892 if (iBitNo1 > iBitNo2)
2893 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2894 else
2895 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2896 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2897
2898#elif defined(RT_ARCH_ARM64)
2899 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2900
2901 /* and tmpreg, eflreg, #1<<iBitNo1 */
2902 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2903
2904 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2905 if (iBitNo1 > iBitNo2)
2906 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2907 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2908 else
2909 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2910 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2911
2912 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2913
2914#else
2915# error "Port me"
2916#endif
2917
2918 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2919 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2920 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2921
2922 /* Free but don't flush the EFlags and tmp registers. */
2923 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2924 iemNativeRegFreeTmp(pReNative, idxEflReg);
2925
2926 /* Make a copy of the core state now as we start the if-block. */
2927 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2928
2929 return off;
2930}
2931
2932
2933#define IEM_MC_IF_CX_IS_NZ() \
2934 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2935 do {
2936
2937/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2938DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2939{
2940 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2941
2942 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2943 kIemNativeGstRegUse_ReadOnly);
2944 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2945 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2946
2947 iemNativeCondStartIfBlock(pReNative, off);
2948 return off;
2949}
2950
2951
2952#define IEM_MC_IF_ECX_IS_NZ() \
2953 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2954 do {
2955
2956#define IEM_MC_IF_RCX_IS_NZ() \
2957 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2958 do {
2959
2960/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2961DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2962{
2963 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2964
2965 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2966 kIemNativeGstRegUse_ReadOnly);
2967 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2968 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2969
2970 iemNativeCondStartIfBlock(pReNative, off);
2971 return off;
2972}
2973
2974
2975#define IEM_MC_IF_CX_IS_NOT_ONE() \
2976 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2977 do {
2978
2979/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2980DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2981{
2982 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2983
2984 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2985 kIemNativeGstRegUse_ReadOnly);
2986#ifdef RT_ARCH_AMD64
2987 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2988#else
2989 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2990 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2991 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2992#endif
2993 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2994
2995 iemNativeCondStartIfBlock(pReNative, off);
2996 return off;
2997}
2998
2999
3000#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3001 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3002 do {
3003
3004#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3005 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3006 do {
3007
3008/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3009DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3010{
3011 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3012
3013 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3014 kIemNativeGstRegUse_ReadOnly);
3015 if (f64Bit)
3016 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3017 else
3018 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3019 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3020
3021 iemNativeCondStartIfBlock(pReNative, off);
3022 return off;
3023}
3024
3025
3026#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3027 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3028 do {
3029
3030#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3031 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3032 do {
3033
3034/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3035 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3036DECL_INLINE_THROW(uint32_t)
3037iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3038{
3039 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3040 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3041
3042 /* We have to load both RCX and EFLAGS before we can start branching,
3043 otherwise we'll end up in the else-block with an inconsistent
3044 register allocator state.
3045 Doing EFLAGS first as it's more likely to be loaded, right? */
3046 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3047 kIemNativeGstRegUse_ReadOnly);
3048 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3049 kIemNativeGstRegUse_ReadOnly);
3050
3051 /** @todo we could reduce this to a single branch instruction by spending a
3052 * temporary register and some setnz stuff. Not sure if loops are
3053 * worth it. */
3054 /* Check CX. */
3055#ifdef RT_ARCH_AMD64
3056 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3057#else
3058 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3059 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3060 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3061#endif
3062
3063 /* Check the EFlags bit. */
3064 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3065 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3066 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3067 !fCheckIfSet /*fJmpIfSet*/);
3068
3069 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3070 iemNativeRegFreeTmp(pReNative, idxEflReg);
3071
3072 iemNativeCondStartIfBlock(pReNative, off);
3073 return off;
3074}
3075
3076
3077#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3078 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3079 do {
3080
3081#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3082 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3083 do {
3084
3085#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3086 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3087 do {
3088
3089#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3090 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3091 do {
3092
3093/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3094 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3095 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3096 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3097DECL_INLINE_THROW(uint32_t)
3098iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3099 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3100{
3101 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3102 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3103
3104 /* We have to load both RCX and EFLAGS before we can start branching,
3105 otherwise we'll end up in the else-block with an inconsistent
3106 register allocator state.
3107 Doing EFLAGS first as it's more likely to be loaded, right? */
3108 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3109 kIemNativeGstRegUse_ReadOnly);
3110 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3111 kIemNativeGstRegUse_ReadOnly);
3112
3113 /** @todo we could reduce this to a single branch instruction by spending a
3114 * temporary register and some setnz stuff. Not sure if loops are
3115 * worth it. */
3116 /* Check RCX/ECX. */
3117 if (f64Bit)
3118 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3119 else
3120 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3121
3122 /* Check the EFlags bit. */
3123 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3124 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3125 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3126 !fCheckIfSet /*fJmpIfSet*/);
3127
3128 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3129 iemNativeRegFreeTmp(pReNative, idxEflReg);
3130
3131 iemNativeCondStartIfBlock(pReNative, off);
3132 return off;
3133}
3134
3135
3136#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3137 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3138 do {
3139
3140/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3141DECL_INLINE_THROW(uint32_t)
3142iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3143{
3144 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3145
3146 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3147 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3148 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3149 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3150
3151 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3152
3153 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3154
3155 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3156
3157 iemNativeCondStartIfBlock(pReNative, off);
3158 return off;
3159}
3160
3161
3162#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3163 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3164 do {
3165
3166/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3167DECL_INLINE_THROW(uint32_t)
3168iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3169{
3170 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3171 Assert(iGReg < 16);
3172
3173 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3174 kIemNativeGstRegUse_ReadOnly);
3175
3176 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3177
3178 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3179
3180 iemNativeCondStartIfBlock(pReNative, off);
3181 return off;
3182}
3183
3184
3185
3186/*********************************************************************************************************************************
3187* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3188*********************************************************************************************************************************/
3189
3190#define IEM_MC_NOREF(a_Name) \
3191 RT_NOREF_PV(a_Name)
3192
3193#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3194 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3195
3196#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3197 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3198
3199#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3200 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3201
3202#define IEM_MC_LOCAL(a_Type, a_Name) \
3203 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3204
3205#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3206 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3207
3208#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3209 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3210
3211
3212/**
3213 * Sets the host register for @a idxVarRc to @a idxReg.
3214 *
3215 * The register must not be allocated. Any guest register shadowing will be
3216 * implictly dropped by this call.
3217 *
3218 * The variable must not have any register associated with it (causes
3219 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3220 * implied.
3221 *
3222 * @returns idxReg
3223 * @param pReNative The recompiler state.
3224 * @param idxVar The variable.
3225 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3226 * @param off For recording in debug info.
3227 *
3228 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3229 */
3230DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3231{
3232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3233 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3234 Assert(!pVar->fRegAcquired);
3235 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3236 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3237 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3238
3239 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3240 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3241
3242 iemNativeVarSetKindToStack(pReNative, idxVar);
3243 pVar->idxReg = idxReg;
3244
3245 return idxReg;
3246}
3247
3248
3249/**
3250 * A convenient helper function.
3251 */
3252DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3253 uint8_t idxReg, uint32_t *poff)
3254{
3255 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3256 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3257 return idxReg;
3258}
3259
3260
3261/**
3262 * This is called by IEM_MC_END() to clean up all variables.
3263 */
3264DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3265{
3266 uint32_t const bmVars = pReNative->Core.bmVars;
3267 if (bmVars != 0)
3268 iemNativeVarFreeAllSlow(pReNative, bmVars);
3269 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3270 Assert(pReNative->Core.bmStack == 0);
3271}
3272
3273
3274#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3275
3276/**
3277 * This is called by IEM_MC_FREE_LOCAL.
3278 */
3279DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3280{
3281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3282 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3283 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3284}
3285
3286
3287#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3288
3289/**
3290 * This is called by IEM_MC_FREE_ARG.
3291 */
3292DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3293{
3294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3295 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3296 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3297}
3298
3299
3300#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3301
3302/**
3303 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3304 */
3305DECL_INLINE_THROW(uint32_t)
3306iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3307{
3308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3309 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3310 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3311 Assert( pVarDst->cbVar == sizeof(uint16_t)
3312 || pVarDst->cbVar == sizeof(uint32_t));
3313
3314 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3315 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3316 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3317 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3318 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3319
3320 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3321
3322 /*
3323 * Special case for immediates.
3324 */
3325 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3326 {
3327 switch (pVarDst->cbVar)
3328 {
3329 case sizeof(uint16_t):
3330 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3331 break;
3332 case sizeof(uint32_t):
3333 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3334 break;
3335 default: AssertFailed(); break;
3336 }
3337 }
3338 else
3339 {
3340 /*
3341 * The generic solution for now.
3342 */
3343 /** @todo optimize this by having the python script make sure the source
3344 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3345 * statement. Then we could just transfer the register assignments. */
3346 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3347 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3348 switch (pVarDst->cbVar)
3349 {
3350 case sizeof(uint16_t):
3351 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3352 break;
3353 case sizeof(uint32_t):
3354 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3355 break;
3356 default: AssertFailed(); break;
3357 }
3358 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3359 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3360 }
3361 return off;
3362}
3363
3364
3365
3366/*********************************************************************************************************************************
3367* Emitters for IEM_MC_CALL_CIMPL_XXX *
3368*********************************************************************************************************************************/
3369
3370/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3371DECL_INLINE_THROW(uint32_t)
3372iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3373 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3374
3375{
3376 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3377
3378#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3379 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3380 when a calls clobber any of the relevatn control registers. */
3381# if 1
3382 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3383 {
3384 /* Likely as long as call+ret are done via cimpl. */
3385 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3386 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3387 }
3388 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3389 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3390 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3391 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3392 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3393 else
3394 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3395 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3396 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3397
3398# else
3399 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3400 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3401 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3402 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3403 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3404 || pfnCImpl == (uintptr_t)iemCImpl_callf
3405 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3406 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3407 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3408 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3409 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3410# endif
3411#endif
3412
3413 /*
3414 * Do all the call setup and cleanup.
3415 */
3416 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3417
3418 /*
3419 * Load the two or three hidden arguments.
3420 */
3421#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3422 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3423 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3424 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3425#else
3426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3427 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3428#endif
3429
3430 /*
3431 * Make the call and check the return code.
3432 *
3433 * Shadow PC copies are always flushed here, other stuff depends on flags.
3434 * Segment and general purpose registers are explictily flushed via the
3435 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3436 * macros.
3437 */
3438 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3439#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3440 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3441#endif
3442 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3443 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3444 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3445 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3446
3447 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3448}
3449
3450
3451#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3452 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3453
3454/** Emits code for IEM_MC_CALL_CIMPL_1. */
3455DECL_INLINE_THROW(uint32_t)
3456iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3457 uintptr_t pfnCImpl, uint8_t idxArg0)
3458{
3459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3460 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3461}
3462
3463
3464#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3465 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3466
3467/** Emits code for IEM_MC_CALL_CIMPL_2. */
3468DECL_INLINE_THROW(uint32_t)
3469iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3470 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3471{
3472 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3473 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3474 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3475}
3476
3477
3478#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3479 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3480 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3481
3482/** Emits code for IEM_MC_CALL_CIMPL_3. */
3483DECL_INLINE_THROW(uint32_t)
3484iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3485 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3486{
3487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3488 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3490 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3491}
3492
3493
3494#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3495 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3496 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3497
3498/** Emits code for IEM_MC_CALL_CIMPL_4. */
3499DECL_INLINE_THROW(uint32_t)
3500iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3501 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3502{
3503 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3504 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3505 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3506 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3507 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3508}
3509
3510
3511#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3512 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3513 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3514
3515/** Emits code for IEM_MC_CALL_CIMPL_4. */
3516DECL_INLINE_THROW(uint32_t)
3517iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3518 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3519{
3520 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3521 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3522 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3523 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3524 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3525 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3526}
3527
3528
3529/** Recompiler debugging: Flush guest register shadow copies. */
3530#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3531
3532
3533
3534/*********************************************************************************************************************************
3535* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3536*********************************************************************************************************************************/
3537
3538/**
3539 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3540 */
3541DECL_INLINE_THROW(uint32_t)
3542iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3543 uintptr_t pfnAImpl, uint8_t cArgs)
3544{
3545 if (idxVarRc != UINT8_MAX)
3546 {
3547 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3548 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3549 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3550 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3551 }
3552
3553 /*
3554 * Do all the call setup and cleanup.
3555 *
3556 * It is only required to flush pending guest register writes in call volatile registers as
3557 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3558 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3559 * no matter the fFlushPendingWrites parameter.
3560 */
3561 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3562
3563 /*
3564 * Make the call and update the return code variable if we've got one.
3565 */
3566 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3567 if (idxVarRc != UINT8_MAX)
3568 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3569
3570 return off;
3571}
3572
3573
3574
3575#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3576 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3577
3578#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3579 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3580
3581/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3582DECL_INLINE_THROW(uint32_t)
3583iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3584{
3585 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3586}
3587
3588
3589#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3590 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3591
3592#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3593 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3594
3595/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3596DECL_INLINE_THROW(uint32_t)
3597iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3598{
3599 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3600 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3601}
3602
3603
3604#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3605 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3606
3607#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3608 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3609
3610/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3611DECL_INLINE_THROW(uint32_t)
3612iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3613 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3614{
3615 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3616 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3617 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3618}
3619
3620
3621#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3622 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3623
3624#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3625 IEM_MC_LOCAL(a_rcType, a_rc); \
3626 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3627
3628/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3629DECL_INLINE_THROW(uint32_t)
3630iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3631 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3632{
3633 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3634 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3635 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3636 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3637}
3638
3639
3640#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3641 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3642
3643#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3644 IEM_MC_LOCAL(a_rcType, a_rc); \
3645 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3646
3647/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3648DECL_INLINE_THROW(uint32_t)
3649iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3650 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3651{
3652 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3653 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3654 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3655 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3656 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3657}
3658
3659
3660
3661/*********************************************************************************************************************************
3662* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3663*********************************************************************************************************************************/
3664
3665#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3666 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3667
3668#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3669 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3670
3671#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3672 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3673
3674#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3675 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3676
3677
3678/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3679 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3680DECL_INLINE_THROW(uint32_t)
3681iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3682{
3683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3684 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3685 Assert(iGRegEx < 20);
3686
3687 /* Same discussion as in iemNativeEmitFetchGregU16 */
3688 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3689 kIemNativeGstRegUse_ReadOnly);
3690
3691 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3692 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3693
3694 /* The value is zero-extended to the full 64-bit host register width. */
3695 if (iGRegEx < 16)
3696 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3697 else
3698 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3699
3700 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3701 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3702 return off;
3703}
3704
3705
3706#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3707 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3708
3709#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3710 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3711
3712#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3713 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3714
3715/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3716DECL_INLINE_THROW(uint32_t)
3717iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3718{
3719 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3720 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3721 Assert(iGRegEx < 20);
3722
3723 /* Same discussion as in iemNativeEmitFetchGregU16 */
3724 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3725 kIemNativeGstRegUse_ReadOnly);
3726
3727 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3728 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3729
3730 if (iGRegEx < 16)
3731 {
3732 switch (cbSignExtended)
3733 {
3734 case sizeof(uint16_t):
3735 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3736 break;
3737 case sizeof(uint32_t):
3738 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3739 break;
3740 case sizeof(uint64_t):
3741 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3742 break;
3743 default: AssertFailed(); break;
3744 }
3745 }
3746 else
3747 {
3748 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3749 switch (cbSignExtended)
3750 {
3751 case sizeof(uint16_t):
3752 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3753 break;
3754 case sizeof(uint32_t):
3755 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3756 break;
3757 case sizeof(uint64_t):
3758 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3759 break;
3760 default: AssertFailed(); break;
3761 }
3762 }
3763
3764 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3765 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3766 return off;
3767}
3768
3769
3770
3771#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3772 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3773
3774#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3775 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3776
3777#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3778 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3779
3780/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3781DECL_INLINE_THROW(uint32_t)
3782iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3783{
3784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3786 Assert(iGReg < 16);
3787
3788 /*
3789 * We can either just load the low 16-bit of the GPR into a host register
3790 * for the variable, or we can do so via a shadow copy host register. The
3791 * latter will avoid having to reload it if it's being stored later, but
3792 * will waste a host register if it isn't touched again. Since we don't
3793 * know what going to happen, we choose the latter for now.
3794 */
3795 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3796 kIemNativeGstRegUse_ReadOnly);
3797
3798 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3800 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3801 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3802
3803 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3804 return off;
3805}
3806
3807
3808#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3809 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3810
3811#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3812 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3813
3814/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3815DECL_INLINE_THROW(uint32_t)
3816iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3817{
3818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3820 Assert(iGReg < 16);
3821
3822 /*
3823 * We can either just load the low 16-bit of the GPR into a host register
3824 * for the variable, or we can do so via a shadow copy host register. The
3825 * latter will avoid having to reload it if it's being stored later, but
3826 * will waste a host register if it isn't touched again. Since we don't
3827 * know what going to happen, we choose the latter for now.
3828 */
3829 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3830 kIemNativeGstRegUse_ReadOnly);
3831
3832 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3833 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3834 if (cbSignExtended == sizeof(uint32_t))
3835 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3836 else
3837 {
3838 Assert(cbSignExtended == sizeof(uint64_t));
3839 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3840 }
3841 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3842
3843 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3844 return off;
3845}
3846
3847
3848#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3849 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3850
3851#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3852 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3853
3854/** Emits code for IEM_MC_FETCH_GREG_U32. */
3855DECL_INLINE_THROW(uint32_t)
3856iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3857{
3858 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3859 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3860 Assert(iGReg < 16);
3861
3862 /*
3863 * We can either just load the low 16-bit of the GPR into a host register
3864 * for the variable, or we can do so via a shadow copy host register. The
3865 * latter will avoid having to reload it if it's being stored later, but
3866 * will waste a host register if it isn't touched again. Since we don't
3867 * know what going to happen, we choose the latter for now.
3868 */
3869 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3870 kIemNativeGstRegUse_ReadOnly);
3871
3872 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3873 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3874 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3875 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3876
3877 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3878 return off;
3879}
3880
3881
3882#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3883 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3884
3885/** Emits code for IEM_MC_FETCH_GREG_U32. */
3886DECL_INLINE_THROW(uint32_t)
3887iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3888{
3889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3891 Assert(iGReg < 16);
3892
3893 /*
3894 * We can either just load the low 32-bit of the GPR into a host register
3895 * for the variable, or we can do so via a shadow copy host register. The
3896 * latter will avoid having to reload it if it's being stored later, but
3897 * will waste a host register if it isn't touched again. Since we don't
3898 * know what going to happen, we choose the latter for now.
3899 */
3900 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3901 kIemNativeGstRegUse_ReadOnly);
3902
3903 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3904 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3905 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3907
3908 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3909 return off;
3910}
3911
3912
3913#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3914 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3915
3916#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3917 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3918
3919/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3920 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3921DECL_INLINE_THROW(uint32_t)
3922iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3923{
3924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3925 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3926 Assert(iGReg < 16);
3927
3928 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3929 kIemNativeGstRegUse_ReadOnly);
3930
3931 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3932 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3933 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3934 /** @todo name the register a shadow one already? */
3935 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3936
3937 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3938 return off;
3939}
3940
3941
3942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3943#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3944 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3945
3946/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3947DECL_INLINE_THROW(uint32_t)
3948iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3949{
3950 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3951 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3952 Assert(iGRegLo < 16 && iGRegHi < 16);
3953
3954 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3955 kIemNativeGstRegUse_ReadOnly);
3956 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3957 kIemNativeGstRegUse_ReadOnly);
3958
3959 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3960 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3961 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3962 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3963
3964 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3965 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3966 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3967 return off;
3968}
3969#endif
3970
3971
3972/*********************************************************************************************************************************
3973* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3974*********************************************************************************************************************************/
3975
3976#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3977 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3978
3979/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3980DECL_INLINE_THROW(uint32_t)
3981iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3982{
3983 Assert(iGRegEx < 20);
3984 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3985 kIemNativeGstRegUse_ForUpdate);
3986#ifdef RT_ARCH_AMD64
3987 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3988
3989 /* To the lowest byte of the register: mov r8, imm8 */
3990 if (iGRegEx < 16)
3991 {
3992 if (idxGstTmpReg >= 8)
3993 pbCodeBuf[off++] = X86_OP_REX_B;
3994 else if (idxGstTmpReg >= 4)
3995 pbCodeBuf[off++] = X86_OP_REX;
3996 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3997 pbCodeBuf[off++] = u8Value;
3998 }
3999 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4000 else if (idxGstTmpReg < 4)
4001 {
4002 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4003 pbCodeBuf[off++] = u8Value;
4004 }
4005 else
4006 {
4007 /* ror reg64, 8 */
4008 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4009 pbCodeBuf[off++] = 0xc1;
4010 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4011 pbCodeBuf[off++] = 8;
4012
4013 /* mov reg8, imm8 */
4014 if (idxGstTmpReg >= 8)
4015 pbCodeBuf[off++] = X86_OP_REX_B;
4016 else if (idxGstTmpReg >= 4)
4017 pbCodeBuf[off++] = X86_OP_REX;
4018 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4019 pbCodeBuf[off++] = u8Value;
4020
4021 /* rol reg64, 8 */
4022 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4023 pbCodeBuf[off++] = 0xc1;
4024 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4025 pbCodeBuf[off++] = 8;
4026 }
4027
4028#elif defined(RT_ARCH_ARM64)
4029 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4030 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4031 if (iGRegEx < 16)
4032 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4033 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4034 else
4035 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4036 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4037 iemNativeRegFreeTmp(pReNative, idxImmReg);
4038
4039#else
4040# error "Port me!"
4041#endif
4042
4043 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4044
4045#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4046 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4047#endif
4048
4049 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4050 return off;
4051}
4052
4053
4054#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4055 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4056
4057/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4058DECL_INLINE_THROW(uint32_t)
4059iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4060{
4061 Assert(iGRegEx < 20);
4062 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4063
4064 /*
4065 * If it's a constant value (unlikely) we treat this as a
4066 * IEM_MC_STORE_GREG_U8_CONST statement.
4067 */
4068 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4069 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4070 { /* likely */ }
4071 else
4072 {
4073 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4074 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4075 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4076 }
4077
4078 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4079 kIemNativeGstRegUse_ForUpdate);
4080 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4081
4082#ifdef RT_ARCH_AMD64
4083 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4084 if (iGRegEx < 16)
4085 {
4086 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4087 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4088 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4089 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4090 pbCodeBuf[off++] = X86_OP_REX;
4091 pbCodeBuf[off++] = 0x8a;
4092 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4093 }
4094 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4095 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4096 {
4097 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4098 pbCodeBuf[off++] = 0x8a;
4099 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4100 }
4101 else
4102 {
4103 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4104
4105 /* ror reg64, 8 */
4106 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4107 pbCodeBuf[off++] = 0xc1;
4108 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4109 pbCodeBuf[off++] = 8;
4110
4111 /* mov reg8, reg8(r/m) */
4112 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4113 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4114 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4115 pbCodeBuf[off++] = X86_OP_REX;
4116 pbCodeBuf[off++] = 0x8a;
4117 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4118
4119 /* rol reg64, 8 */
4120 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4121 pbCodeBuf[off++] = 0xc1;
4122 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4123 pbCodeBuf[off++] = 8;
4124 }
4125
4126#elif defined(RT_ARCH_ARM64)
4127 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4128 or
4129 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4130 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4131 if (iGRegEx < 16)
4132 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4133 else
4134 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4135
4136#else
4137# error "Port me!"
4138#endif
4139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4140
4141 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4142
4143#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4144 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4145#endif
4146 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4147 return off;
4148}
4149
4150
4151
4152#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4153 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4154
4155/** Emits code for IEM_MC_STORE_GREG_U16. */
4156DECL_INLINE_THROW(uint32_t)
4157iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4158{
4159 Assert(iGReg < 16);
4160 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4161 kIemNativeGstRegUse_ForUpdate);
4162#ifdef RT_ARCH_AMD64
4163 /* mov reg16, imm16 */
4164 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4165 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4166 if (idxGstTmpReg >= 8)
4167 pbCodeBuf[off++] = X86_OP_REX_B;
4168 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4169 pbCodeBuf[off++] = RT_BYTE1(uValue);
4170 pbCodeBuf[off++] = RT_BYTE2(uValue);
4171
4172#elif defined(RT_ARCH_ARM64)
4173 /* movk xdst, #uValue, lsl #0 */
4174 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4175 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4176
4177#else
4178# error "Port me!"
4179#endif
4180
4181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4182
4183#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4185#endif
4186 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4187 return off;
4188}
4189
4190
4191#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4192 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4193
4194/** Emits code for IEM_MC_STORE_GREG_U16. */
4195DECL_INLINE_THROW(uint32_t)
4196iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4197{
4198 Assert(iGReg < 16);
4199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4200
4201 /*
4202 * If it's a constant value (unlikely) we treat this as a
4203 * IEM_MC_STORE_GREG_U16_CONST statement.
4204 */
4205 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4206 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4207 { /* likely */ }
4208 else
4209 {
4210 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4211 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4212 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4213 }
4214
4215 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4216 kIemNativeGstRegUse_ForUpdate);
4217
4218#ifdef RT_ARCH_AMD64
4219 /* mov reg16, reg16 or [mem16] */
4220 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4221 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4222 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4223 {
4224 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4225 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4226 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4227 pbCodeBuf[off++] = 0x8b;
4228 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4229 }
4230 else
4231 {
4232 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4233 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4234 if (idxGstTmpReg >= 8)
4235 pbCodeBuf[off++] = X86_OP_REX_R;
4236 pbCodeBuf[off++] = 0x8b;
4237 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4238 }
4239
4240#elif defined(RT_ARCH_ARM64)
4241 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4242 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4243 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4244 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4245 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4246
4247#else
4248# error "Port me!"
4249#endif
4250
4251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4252
4253#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4254 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4255#endif
4256 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4257 return off;
4258}
4259
4260
4261#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4262 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4263
4264/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4265DECL_INLINE_THROW(uint32_t)
4266iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4267{
4268 Assert(iGReg < 16);
4269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4270 kIemNativeGstRegUse_ForFullWrite);
4271 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4272#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4273 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4274#endif
4275 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4276 return off;
4277}
4278
4279
4280#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4281 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4282
4283#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
4284 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
4285
4286/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
4287DECL_INLINE_THROW(uint32_t)
4288iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4289{
4290 Assert(iGReg < 16);
4291 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4292
4293 /*
4294 * If it's a constant value (unlikely) we treat this as a
4295 * IEM_MC_STORE_GREG_U32_CONST statement.
4296 */
4297 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4298 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4299 { /* likely */ }
4300 else
4301 {
4302 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4303 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4304 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4305 }
4306
4307 /*
4308 * For the rest we allocate a guest register for the variable and writes
4309 * it to the CPUMCTX structure.
4310 */
4311 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4312#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4313 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4314#else
4315 RT_NOREF(idxVarReg);
4316#endif
4317#ifdef VBOX_STRICT
4318 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4319#endif
4320 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4321 return off;
4322}
4323
4324
4325#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4326 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4327
4328/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4329DECL_INLINE_THROW(uint32_t)
4330iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4331{
4332 Assert(iGReg < 16);
4333 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4334 kIemNativeGstRegUse_ForFullWrite);
4335 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4336#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4337 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4338#endif
4339 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4340 return off;
4341}
4342
4343
4344#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4345 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4346
4347#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4348 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4349
4350/** Emits code for IEM_MC_STORE_GREG_U64. */
4351DECL_INLINE_THROW(uint32_t)
4352iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4353{
4354 Assert(iGReg < 16);
4355 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4356
4357 /*
4358 * If it's a constant value (unlikely) we treat this as a
4359 * IEM_MC_STORE_GREG_U64_CONST statement.
4360 */
4361 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4362 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4363 { /* likely */ }
4364 else
4365 {
4366 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4367 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4368 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4369 }
4370
4371 /*
4372 * For the rest we allocate a guest register for the variable and writes
4373 * it to the CPUMCTX structure.
4374 */
4375 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4376#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4377 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4378#else
4379 RT_NOREF(idxVarReg);
4380#endif
4381 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4382 return off;
4383}
4384
4385
4386#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4387 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4388
4389/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4390DECL_INLINE_THROW(uint32_t)
4391iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4392{
4393 Assert(iGReg < 16);
4394 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4395 kIemNativeGstRegUse_ForUpdate);
4396 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4397#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4398 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4399#endif
4400 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4401 return off;
4402}
4403
4404
4405#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4406#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4407 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4408
4409/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4410DECL_INLINE_THROW(uint32_t)
4411iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4412{
4413 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4414 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4415 Assert(iGRegLo < 16 && iGRegHi < 16);
4416
4417 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4418 kIemNativeGstRegUse_ForFullWrite);
4419 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4420 kIemNativeGstRegUse_ForFullWrite);
4421
4422 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4423 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4424 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4425 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4426
4427 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4428 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4429 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4430 return off;
4431}
4432#endif
4433
4434
4435/*********************************************************************************************************************************
4436* General purpose register manipulation (add, sub). *
4437*********************************************************************************************************************************/
4438
4439#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4440 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4441
4442/** Emits code for IEM_MC_ADD_GREG_U16. */
4443DECL_INLINE_THROW(uint32_t)
4444iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4445{
4446 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4447 kIemNativeGstRegUse_ForUpdate);
4448
4449#ifdef RT_ARCH_AMD64
4450 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4451 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4452 if (idxGstTmpReg >= 8)
4453 pbCodeBuf[off++] = X86_OP_REX_B;
4454 if (uAddend == 1)
4455 {
4456 pbCodeBuf[off++] = 0xff; /* inc */
4457 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4458 }
4459 else
4460 {
4461 pbCodeBuf[off++] = 0x81;
4462 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4463 pbCodeBuf[off++] = uAddend;
4464 pbCodeBuf[off++] = 0;
4465 }
4466
4467#else
4468 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4469 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4470
4471 /* sub tmp, gstgrp, uAddend */
4472 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4473
4474 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4475 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4476
4477 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4478#endif
4479
4480 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4481
4482#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4483 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4484#endif
4485
4486 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4487 return off;
4488}
4489
4490
4491#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4492 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4493
4494#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4495 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4496
4497/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4498DECL_INLINE_THROW(uint32_t)
4499iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4500{
4501 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4502 kIemNativeGstRegUse_ForUpdate);
4503
4504#ifdef RT_ARCH_AMD64
4505 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4506 if (f64Bit)
4507 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4508 else if (idxGstTmpReg >= 8)
4509 pbCodeBuf[off++] = X86_OP_REX_B;
4510 if (uAddend == 1)
4511 {
4512 pbCodeBuf[off++] = 0xff; /* inc */
4513 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4514 }
4515 else if (uAddend < 128)
4516 {
4517 pbCodeBuf[off++] = 0x83; /* add */
4518 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4519 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4520 }
4521 else
4522 {
4523 pbCodeBuf[off++] = 0x81; /* add */
4524 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4525 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4526 pbCodeBuf[off++] = 0;
4527 pbCodeBuf[off++] = 0;
4528 pbCodeBuf[off++] = 0;
4529 }
4530
4531#else
4532 /* sub tmp, gstgrp, uAddend */
4533 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4534 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4535
4536#endif
4537
4538 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4539
4540#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4541 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4542#endif
4543
4544 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4545 return off;
4546}
4547
4548
4549
4550#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4551 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4552
4553/** Emits code for IEM_MC_SUB_GREG_U16. */
4554DECL_INLINE_THROW(uint32_t)
4555iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4556{
4557 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4558 kIemNativeGstRegUse_ForUpdate);
4559
4560#ifdef RT_ARCH_AMD64
4561 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4562 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4563 if (idxGstTmpReg >= 8)
4564 pbCodeBuf[off++] = X86_OP_REX_B;
4565 if (uSubtrahend == 1)
4566 {
4567 pbCodeBuf[off++] = 0xff; /* dec */
4568 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4569 }
4570 else
4571 {
4572 pbCodeBuf[off++] = 0x81;
4573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4574 pbCodeBuf[off++] = uSubtrahend;
4575 pbCodeBuf[off++] = 0;
4576 }
4577
4578#else
4579 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4580 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4581
4582 /* sub tmp, gstgrp, uSubtrahend */
4583 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4584
4585 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4586 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4587
4588 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4589#endif
4590
4591 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4592
4593#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4594 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4595#endif
4596
4597 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4598 return off;
4599}
4600
4601
4602#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4603 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4604
4605#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4606 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4607
4608/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4609DECL_INLINE_THROW(uint32_t)
4610iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4611{
4612 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4613 kIemNativeGstRegUse_ForUpdate);
4614
4615#ifdef RT_ARCH_AMD64
4616 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4617 if (f64Bit)
4618 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4619 else if (idxGstTmpReg >= 8)
4620 pbCodeBuf[off++] = X86_OP_REX_B;
4621 if (uSubtrahend == 1)
4622 {
4623 pbCodeBuf[off++] = 0xff; /* dec */
4624 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4625 }
4626 else if (uSubtrahend < 128)
4627 {
4628 pbCodeBuf[off++] = 0x83; /* sub */
4629 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4630 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4631 }
4632 else
4633 {
4634 pbCodeBuf[off++] = 0x81; /* sub */
4635 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4636 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4637 pbCodeBuf[off++] = 0;
4638 pbCodeBuf[off++] = 0;
4639 pbCodeBuf[off++] = 0;
4640 }
4641
4642#else
4643 /* sub tmp, gstgrp, uSubtrahend */
4644 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4645 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4646
4647#endif
4648
4649 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4650
4651#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4652 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4653#endif
4654
4655 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4656 return off;
4657}
4658
4659
4660#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4661 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4662
4663#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4664 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4665
4666#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4667 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4668
4669#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4670 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4671
4672/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4673DECL_INLINE_THROW(uint32_t)
4674iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4675{
4676#ifdef VBOX_STRICT
4677 switch (cbMask)
4678 {
4679 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4680 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4681 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4682 case sizeof(uint64_t): break;
4683 default: AssertFailedBreak();
4684 }
4685#endif
4686
4687 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4688 kIemNativeGstRegUse_ForUpdate);
4689
4690 switch (cbMask)
4691 {
4692 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4693 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4694 break;
4695 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4696 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4697 break;
4698 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4699 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4700 break;
4701 case sizeof(uint64_t):
4702 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4703 break;
4704 default: AssertFailedBreak();
4705 }
4706
4707 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4708
4709#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4710 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4711#endif
4712
4713 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4714 return off;
4715}
4716
4717
4718#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4719 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4720
4721#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4722 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4723
4724#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4725 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4726
4727#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4728 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4729
4730/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4731DECL_INLINE_THROW(uint32_t)
4732iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4733{
4734#ifdef VBOX_STRICT
4735 switch (cbMask)
4736 {
4737 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4738 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4739 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4740 case sizeof(uint64_t): break;
4741 default: AssertFailedBreak();
4742 }
4743#endif
4744
4745 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4746 kIemNativeGstRegUse_ForUpdate);
4747
4748 switch (cbMask)
4749 {
4750 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4751 case sizeof(uint16_t):
4752 case sizeof(uint64_t):
4753 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4754 break;
4755 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4756 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4757 break;
4758 default: AssertFailedBreak();
4759 }
4760
4761 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4762
4763#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4764 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4765#endif
4766
4767 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4768 return off;
4769}
4770
4771
4772/*********************************************************************************************************************************
4773* Local/Argument variable manipulation (add, sub, and, or). *
4774*********************************************************************************************************************************/
4775
4776#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4777 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4778
4779#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4780 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4781
4782#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4783 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4784
4785#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4786 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4787
4788
4789#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4790 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4791
4792#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4793 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4794
4795#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4796 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4797
4798/** Emits code for AND'ing a local and a constant value. */
4799DECL_INLINE_THROW(uint32_t)
4800iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4801{
4802#ifdef VBOX_STRICT
4803 switch (cbMask)
4804 {
4805 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4806 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4807 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4808 case sizeof(uint64_t): break;
4809 default: AssertFailedBreak();
4810 }
4811#endif
4812
4813 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4814 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4815
4816 if (cbMask <= sizeof(uint32_t))
4817 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4818 else
4819 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4820
4821 iemNativeVarRegisterRelease(pReNative, idxVar);
4822 return off;
4823}
4824
4825
4826#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4827 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4828
4829#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4830 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4831
4832#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4833 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4834
4835#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4836 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4837
4838/** Emits code for OR'ing a local and a constant value. */
4839DECL_INLINE_THROW(uint32_t)
4840iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4841{
4842#ifdef VBOX_STRICT
4843 switch (cbMask)
4844 {
4845 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4846 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4847 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4848 case sizeof(uint64_t): break;
4849 default: AssertFailedBreak();
4850 }
4851#endif
4852
4853 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4854 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4855
4856 if (cbMask <= sizeof(uint32_t))
4857 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4858 else
4859 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4860
4861 iemNativeVarRegisterRelease(pReNative, idxVar);
4862 return off;
4863}
4864
4865
4866#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4867 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4868
4869#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4870 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4871
4872#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4873 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4874
4875/** Emits code for reversing the byte order in a local value. */
4876DECL_INLINE_THROW(uint32_t)
4877iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4878{
4879 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4880 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4881
4882 switch (cbLocal)
4883 {
4884 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4885 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4886 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4887 default: AssertFailedBreak();
4888 }
4889
4890 iemNativeVarRegisterRelease(pReNative, idxVar);
4891 return off;
4892}
4893
4894
4895#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4896 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4897
4898#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4899 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4900
4901#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4902 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4903
4904/** Emits code for shifting left a local value. */
4905DECL_INLINE_THROW(uint32_t)
4906iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4907{
4908#ifdef VBOX_STRICT
4909 switch (cbLocal)
4910 {
4911 case sizeof(uint8_t): Assert(cShift < 8); break;
4912 case sizeof(uint16_t): Assert(cShift < 16); break;
4913 case sizeof(uint32_t): Assert(cShift < 32); break;
4914 case sizeof(uint64_t): Assert(cShift < 64); break;
4915 default: AssertFailedBreak();
4916 }
4917#endif
4918
4919 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4921
4922 if (cbLocal <= sizeof(uint32_t))
4923 {
4924 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4925 if (cbLocal < sizeof(uint32_t))
4926 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4927 cbLocal == sizeof(uint16_t)
4928 ? UINT32_C(0xffff)
4929 : UINT32_C(0xff));
4930 }
4931 else
4932 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4933
4934 iemNativeVarRegisterRelease(pReNative, idxVar);
4935 return off;
4936}
4937
4938
4939#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4940 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4941
4942#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4943 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4944
4945#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4946 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4947
4948/** Emits code for shifting left a local value. */
4949DECL_INLINE_THROW(uint32_t)
4950iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4951{
4952#ifdef VBOX_STRICT
4953 switch (cbLocal)
4954 {
4955 case sizeof(int8_t): Assert(cShift < 8); break;
4956 case sizeof(int16_t): Assert(cShift < 16); break;
4957 case sizeof(int32_t): Assert(cShift < 32); break;
4958 case sizeof(int64_t): Assert(cShift < 64); break;
4959 default: AssertFailedBreak();
4960 }
4961#endif
4962
4963 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4964 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4965
4966 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4967 if (cbLocal == sizeof(uint8_t))
4968 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4969 else if (cbLocal == sizeof(uint16_t))
4970 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4971
4972 if (cbLocal <= sizeof(uint32_t))
4973 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4974 else
4975 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4976
4977 iemNativeVarRegisterRelease(pReNative, idxVar);
4978 return off;
4979}
4980
4981
4982#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4983 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4984
4985#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4986 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4987
4988#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4989 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4990
4991/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4992DECL_INLINE_THROW(uint32_t)
4993iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4994{
4995 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4996 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4998 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4999
5000 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5001 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5002
5003 /* Need to sign extend the value. */
5004 if (cbLocal <= sizeof(uint32_t))
5005 {
5006/** @todo ARM64: In case of boredone, the extended add instruction can do the
5007 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5008 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5009
5010 switch (cbLocal)
5011 {
5012 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5013 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5014 default: AssertFailed();
5015 }
5016
5017 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5018 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5019 }
5020 else
5021 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5022
5023 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5024 iemNativeVarRegisterRelease(pReNative, idxVar);
5025 return off;
5026}
5027
5028
5029
5030/*********************************************************************************************************************************
5031* EFLAGS *
5032*********************************************************************************************************************************/
5033
5034#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5035# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5036#else
5037# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5038 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5039
5040DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5041{
5042 if (fEflOutput)
5043 {
5044 PVMCPUCC const pVCpu = pReNative->pVCpu;
5045# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5046 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5047 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5048 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5049# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5050 if (fEflOutput & (a_fEfl)) \
5051 { \
5052 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5053 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5054 else \
5055 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5056 } else do { } while (0)
5057# else
5058 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5059 IEMLIVENESSBIT const LivenessClobbered =
5060 {
5061 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5062 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5063 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5064 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5065 };
5066 IEMLIVENESSBIT const LivenessDelayable =
5067 {
5068 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5069 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5070 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5071 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5072 };
5073# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5074 if (fEflOutput & (a_fEfl)) \
5075 { \
5076 if (LivenessClobbered.a_fLivenessMember) \
5077 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5078 else if (LivenessDelayable.a_fLivenessMember) \
5079 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5080 else \
5081 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5082 } else do { } while (0)
5083# endif
5084 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5085 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5086 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5087 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5088 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5089 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5090 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5091# undef CHECK_FLAG_AND_UPDATE_STATS
5092 }
5093 RT_NOREF(fEflInput);
5094}
5095#endif /* VBOX_WITH_STATISTICS */
5096
5097#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5098#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5099 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5100
5101/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5102DECL_INLINE_THROW(uint32_t)
5103iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5104 uint32_t fEflInput, uint32_t fEflOutput)
5105{
5106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5107 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5108 RT_NOREF(fEflInput, fEflOutput);
5109
5110#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5111# ifdef VBOX_STRICT
5112 if ( pReNative->idxCurCall != 0
5113 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5114 {
5115 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5116 uint32_t const fBoth = fEflInput | fEflOutput;
5117# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5118 AssertMsg( !(fBoth & (a_fElfConst)) \
5119 || (!(fEflInput & (a_fElfConst)) \
5120 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5121 : !(fEflOutput & (a_fElfConst)) \
5122 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5123 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5124 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5125 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5126 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5127 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5128 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5129 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5130 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5131 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5132# undef ASSERT_ONE_EFL
5133 }
5134# endif
5135#endif
5136
5137 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5138
5139 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5140 * the existing shadow copy. */
5141 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5142 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5143 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5144 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5145 return off;
5146}
5147
5148
5149
5150/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5151 * start using it with custom native code emission (inlining assembly
5152 * instruction helpers). */
5153#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5154#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5155 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5156 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5157
5158#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5159#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5160 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5161 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5162
5163/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5164DECL_INLINE_THROW(uint32_t)
5165iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5166 bool fUpdateSkipping)
5167{
5168 RT_NOREF(fEflOutput);
5169 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5170 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5171
5172#ifdef VBOX_STRICT
5173 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5174 uint32_t offFixup = off;
5175 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5176 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5177 iemNativeFixupFixedJump(pReNative, offFixup, off);
5178
5179 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5180 offFixup = off;
5181 off = iemNativeEmitJzToFixed(pReNative, off, off);
5182 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5183 iemNativeFixupFixedJump(pReNative, offFixup, off);
5184
5185 /** @todo validate that only bits in the fElfOutput mask changed. */
5186#endif
5187
5188#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5189 if (fUpdateSkipping)
5190 {
5191 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5192 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5193 else
5194 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5195 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5196 }
5197#else
5198 RT_NOREF_PV(fUpdateSkipping);
5199#endif
5200
5201 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5202 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5203 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5204 return off;
5205}
5206
5207
5208typedef enum IEMNATIVEMITEFLOP
5209{
5210 kIemNativeEmitEflOp_Invalid = 0,
5211 kIemNativeEmitEflOp_Set,
5212 kIemNativeEmitEflOp_Clear,
5213 kIemNativeEmitEflOp_Flip
5214} IEMNATIVEMITEFLOP;
5215
5216#define IEM_MC_SET_EFL_BIT(a_fBit) \
5217 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5218
5219#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5220 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5221
5222#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5223 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5224
5225/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5226DECL_INLINE_THROW(uint32_t)
5227iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5228{
5229 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5230 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5231
5232 switch (enmOp)
5233 {
5234 case kIemNativeEmitEflOp_Set:
5235 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5236 break;
5237 case kIemNativeEmitEflOp_Clear:
5238 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5239 break;
5240 case kIemNativeEmitEflOp_Flip:
5241 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5242 break;
5243 default:
5244 AssertFailed();
5245 break;
5246 }
5247
5248 /** @todo No delayed writeback for EFLAGS right now. */
5249 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5250
5251 /* Free but don't flush the EFLAGS register. */
5252 iemNativeRegFreeTmp(pReNative, idxEflReg);
5253
5254 return off;
5255}
5256
5257
5258/*********************************************************************************************************************************
5259* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5260*********************************************************************************************************************************/
5261
5262#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5263 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5264
5265#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5266 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5267
5268#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5269 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5270
5271
5272/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5273 * IEM_MC_FETCH_SREG_ZX_U64. */
5274DECL_INLINE_THROW(uint32_t)
5275iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5276{
5277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5278 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5279 Assert(iSReg < X86_SREG_COUNT);
5280
5281 /*
5282 * For now, we will not create a shadow copy of a selector. The rational
5283 * is that since we do not recompile the popping and loading of segment
5284 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5285 * pushing and moving to registers, there is only a small chance that the
5286 * shadow copy will be accessed again before the register is reloaded. One
5287 * scenario would be nested called in 16-bit code, but I doubt it's worth
5288 * the extra register pressure atm.
5289 *
5290 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5291 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5292 * store scencario covered at present (r160730).
5293 */
5294 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5295 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5296 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5297 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5298 return off;
5299}
5300
5301
5302
5303/*********************************************************************************************************************************
5304* Register references. *
5305*********************************************************************************************************************************/
5306
5307#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5308 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5309
5310#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5311 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5312
5313/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5314DECL_INLINE_THROW(uint32_t)
5315iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5316{
5317 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5318 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5319 Assert(iGRegEx < 20);
5320
5321 if (iGRegEx < 16)
5322 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5323 else
5324 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5325
5326 /* If we've delayed writing back the register value, flush it now. */
5327 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5328
5329 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5330 if (!fConst)
5331 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5332
5333 return off;
5334}
5335
5336#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5337 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5338
5339#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5347
5348#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5349 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5350
5351#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5352 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5353
5354#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5355 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5356
5357#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5358 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5359
5360#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5361 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5362
5363#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5364 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5365
5366/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5367DECL_INLINE_THROW(uint32_t)
5368iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5369{
5370 Assert(iGReg < 16);
5371 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5372 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5373
5374 /* If we've delayed writing back the register value, flush it now. */
5375 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5376
5377 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5378 if (!fConst)
5379 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5380
5381 return off;
5382}
5383
5384
5385#undef IEM_MC_REF_EFLAGS /* should not be used. */
5386#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5387 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5388 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5389
5390/** Handles IEM_MC_REF_EFLAGS. */
5391DECL_INLINE_THROW(uint32_t)
5392iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5393{
5394 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5395 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5396
5397#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5398 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5399
5400 /* Updating the skipping according to the outputs is a little early, but
5401 we don't have any other hooks for references atm. */
5402 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5403 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5404 else if (fEflOutput & X86_EFL_STATUS_BITS)
5405 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5406 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5407#else
5408 RT_NOREF(fEflInput, fEflOutput);
5409#endif
5410
5411 /* If we've delayed writing back the register value, flush it now. */
5412 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5413
5414 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5415 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5416
5417 return off;
5418}
5419
5420
5421/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5422 * different code from threaded recompiler, maybe it would be helpful. For now
5423 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5424#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5425
5426
5427#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5428 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5429
5430#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5431 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5432
5433#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5434 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5435
5436#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5437 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5438
5439#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5440/* Just being paranoid here. */
5441# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5442AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5443AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5444AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5445AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5446# endif
5447AssertCompileMemberOffset(X86XMMREG, au64, 0);
5448AssertCompileMemberOffset(X86XMMREG, au32, 0);
5449AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5450AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5451
5452# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5453 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5454# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5455 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5456# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5457 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5458# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5459 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5460#endif
5461
5462/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5463DECL_INLINE_THROW(uint32_t)
5464iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5465{
5466 Assert(iXReg < 16);
5467 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5468 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5469
5470 /* If we've delayed writing back the register value, flush it now. */
5471 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5472
5473#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5474 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5475 if (!fConst)
5476 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5477#else
5478 RT_NOREF(fConst);
5479#endif
5480
5481 return off;
5482}
5483
5484
5485
5486/*********************************************************************************************************************************
5487* Effective Address Calculation *
5488*********************************************************************************************************************************/
5489#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5490 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5491
5492/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5493 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5494DECL_INLINE_THROW(uint32_t)
5495iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5496 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5497{
5498 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5499
5500 /*
5501 * Handle the disp16 form with no registers first.
5502 *
5503 * Convert to an immediate value, as that'll delay the register allocation
5504 * and assignment till the memory access / call / whatever and we can use
5505 * a more appropriate register (or none at all).
5506 */
5507 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5508 {
5509 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5510 return off;
5511 }
5512
5513 /* Determin the displacment. */
5514 uint16_t u16EffAddr;
5515 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5516 {
5517 case 0: u16EffAddr = 0; break;
5518 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5519 case 2: u16EffAddr = u16Disp; break;
5520 default: AssertFailedStmt(u16EffAddr = 0);
5521 }
5522
5523 /* Determine the registers involved. */
5524 uint8_t idxGstRegBase;
5525 uint8_t idxGstRegIndex;
5526 switch (bRm & X86_MODRM_RM_MASK)
5527 {
5528 case 0:
5529 idxGstRegBase = X86_GREG_xBX;
5530 idxGstRegIndex = X86_GREG_xSI;
5531 break;
5532 case 1:
5533 idxGstRegBase = X86_GREG_xBX;
5534 idxGstRegIndex = X86_GREG_xDI;
5535 break;
5536 case 2:
5537 idxGstRegBase = X86_GREG_xBP;
5538 idxGstRegIndex = X86_GREG_xSI;
5539 break;
5540 case 3:
5541 idxGstRegBase = X86_GREG_xBP;
5542 idxGstRegIndex = X86_GREG_xDI;
5543 break;
5544 case 4:
5545 idxGstRegBase = X86_GREG_xSI;
5546 idxGstRegIndex = UINT8_MAX;
5547 break;
5548 case 5:
5549 idxGstRegBase = X86_GREG_xDI;
5550 idxGstRegIndex = UINT8_MAX;
5551 break;
5552 case 6:
5553 idxGstRegBase = X86_GREG_xBP;
5554 idxGstRegIndex = UINT8_MAX;
5555 break;
5556#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5557 default:
5558#endif
5559 case 7:
5560 idxGstRegBase = X86_GREG_xBX;
5561 idxGstRegIndex = UINT8_MAX;
5562 break;
5563 }
5564
5565 /*
5566 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5567 */
5568 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5569 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5570 kIemNativeGstRegUse_ReadOnly);
5571 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5572 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5573 kIemNativeGstRegUse_ReadOnly)
5574 : UINT8_MAX;
5575#ifdef RT_ARCH_AMD64
5576 if (idxRegIndex == UINT8_MAX)
5577 {
5578 if (u16EffAddr == 0)
5579 {
5580 /* movxz ret, base */
5581 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5582 }
5583 else
5584 {
5585 /* lea ret32, [base64 + disp32] */
5586 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5587 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5588 if (idxRegRet >= 8 || idxRegBase >= 8)
5589 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5590 pbCodeBuf[off++] = 0x8d;
5591 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5592 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5593 else
5594 {
5595 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5596 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5597 }
5598 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5599 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5600 pbCodeBuf[off++] = 0;
5601 pbCodeBuf[off++] = 0;
5602 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5603
5604 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5605 }
5606 }
5607 else
5608 {
5609 /* lea ret32, [index64 + base64 (+ disp32)] */
5610 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5611 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5612 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5613 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5614 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5615 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5616 pbCodeBuf[off++] = 0x8d;
5617 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5618 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5619 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5620 if (bMod == X86_MOD_MEM4)
5621 {
5622 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5623 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5624 pbCodeBuf[off++] = 0;
5625 pbCodeBuf[off++] = 0;
5626 }
5627 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5628 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5629 }
5630
5631#elif defined(RT_ARCH_ARM64)
5632 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5633 if (u16EffAddr == 0)
5634 {
5635 if (idxRegIndex == UINT8_MAX)
5636 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5637 else
5638 {
5639 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5640 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5641 }
5642 }
5643 else
5644 {
5645 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5646 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5647 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5648 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5649 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5650 else
5651 {
5652 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5653 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5654 }
5655 if (idxRegIndex != UINT8_MAX)
5656 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5657 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5658 }
5659
5660#else
5661# error "port me"
5662#endif
5663
5664 if (idxRegIndex != UINT8_MAX)
5665 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5666 iemNativeRegFreeTmp(pReNative, idxRegBase);
5667 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5668 return off;
5669}
5670
5671
5672#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5673 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5674
5675/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5676 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5677DECL_INLINE_THROW(uint32_t)
5678iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5679 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5680{
5681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5682
5683 /*
5684 * Handle the disp32 form with no registers first.
5685 *
5686 * Convert to an immediate value, as that'll delay the register allocation
5687 * and assignment till the memory access / call / whatever and we can use
5688 * a more appropriate register (or none at all).
5689 */
5690 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5691 {
5692 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5693 return off;
5694 }
5695
5696 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5697 uint32_t u32EffAddr = 0;
5698 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5699 {
5700 case 0: break;
5701 case 1: u32EffAddr = (int8_t)u32Disp; break;
5702 case 2: u32EffAddr = u32Disp; break;
5703 default: AssertFailed();
5704 }
5705
5706 /* Get the register (or SIB) value. */
5707 uint8_t idxGstRegBase = UINT8_MAX;
5708 uint8_t idxGstRegIndex = UINT8_MAX;
5709 uint8_t cShiftIndex = 0;
5710 switch (bRm & X86_MODRM_RM_MASK)
5711 {
5712 case 0: idxGstRegBase = X86_GREG_xAX; break;
5713 case 1: idxGstRegBase = X86_GREG_xCX; break;
5714 case 2: idxGstRegBase = X86_GREG_xDX; break;
5715 case 3: idxGstRegBase = X86_GREG_xBX; break;
5716 case 4: /* SIB */
5717 {
5718 /* index /w scaling . */
5719 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5720 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5721 {
5722 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5723 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5724 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5725 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5726 case 4: cShiftIndex = 0; /*no index*/ break;
5727 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5728 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5729 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5730 }
5731
5732 /* base */
5733 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5734 {
5735 case 0: idxGstRegBase = X86_GREG_xAX; break;
5736 case 1: idxGstRegBase = X86_GREG_xCX; break;
5737 case 2: idxGstRegBase = X86_GREG_xDX; break;
5738 case 3: idxGstRegBase = X86_GREG_xBX; break;
5739 case 4:
5740 idxGstRegBase = X86_GREG_xSP;
5741 u32EffAddr += uSibAndRspOffset >> 8;
5742 break;
5743 case 5:
5744 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5745 idxGstRegBase = X86_GREG_xBP;
5746 else
5747 {
5748 Assert(u32EffAddr == 0);
5749 u32EffAddr = u32Disp;
5750 }
5751 break;
5752 case 6: idxGstRegBase = X86_GREG_xSI; break;
5753 case 7: idxGstRegBase = X86_GREG_xDI; break;
5754 }
5755 break;
5756 }
5757 case 5: idxGstRegBase = X86_GREG_xBP; break;
5758 case 6: idxGstRegBase = X86_GREG_xSI; break;
5759 case 7: idxGstRegBase = X86_GREG_xDI; break;
5760 }
5761
5762 /*
5763 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5764 * the start of the function.
5765 */
5766 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5767 {
5768 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5769 return off;
5770 }
5771
5772 /*
5773 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5774 */
5775 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5776 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5777 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5778 kIemNativeGstRegUse_ReadOnly);
5779 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5780 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5781 kIemNativeGstRegUse_ReadOnly);
5782
5783 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5784 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5785 {
5786 idxRegBase = idxRegIndex;
5787 idxRegIndex = UINT8_MAX;
5788 }
5789
5790#ifdef RT_ARCH_AMD64
5791 if (idxRegIndex == UINT8_MAX)
5792 {
5793 if (u32EffAddr == 0)
5794 {
5795 /* mov ret, base */
5796 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5797 }
5798 else
5799 {
5800 /* lea ret32, [base64 + disp32] */
5801 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5802 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5803 if (idxRegRet >= 8 || idxRegBase >= 8)
5804 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5805 pbCodeBuf[off++] = 0x8d;
5806 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5807 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5808 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5809 else
5810 {
5811 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5812 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5813 }
5814 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5815 if (bMod == X86_MOD_MEM4)
5816 {
5817 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5818 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5819 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5820 }
5821 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5822 }
5823 }
5824 else
5825 {
5826 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5827 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5828 if (idxRegBase == UINT8_MAX)
5829 {
5830 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5831 if (idxRegRet >= 8 || idxRegIndex >= 8)
5832 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5833 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5834 pbCodeBuf[off++] = 0x8d;
5835 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5836 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5837 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5838 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5839 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5840 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5841 }
5842 else
5843 {
5844 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5845 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5846 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5847 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5848 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5849 pbCodeBuf[off++] = 0x8d;
5850 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5851 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5852 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5853 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5854 if (bMod != X86_MOD_MEM0)
5855 {
5856 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5857 if (bMod == X86_MOD_MEM4)
5858 {
5859 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5860 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5861 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5862 }
5863 }
5864 }
5865 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5866 }
5867
5868#elif defined(RT_ARCH_ARM64)
5869 if (u32EffAddr == 0)
5870 {
5871 if (idxRegIndex == UINT8_MAX)
5872 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5873 else if (idxRegBase == UINT8_MAX)
5874 {
5875 if (cShiftIndex == 0)
5876 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5877 else
5878 {
5879 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5880 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5881 }
5882 }
5883 else
5884 {
5885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5886 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5887 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5888 }
5889 }
5890 else
5891 {
5892 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5893 {
5894 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5895 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5896 }
5897 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5898 {
5899 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5900 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5901 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5902 }
5903 else
5904 {
5905 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5906 if (idxRegBase != UINT8_MAX)
5907 {
5908 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5909 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5910 }
5911 }
5912 if (idxRegIndex != UINT8_MAX)
5913 {
5914 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5915 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5916 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5917 }
5918 }
5919
5920#else
5921# error "port me"
5922#endif
5923
5924 if (idxRegIndex != UINT8_MAX)
5925 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5926 if (idxRegBase != UINT8_MAX)
5927 iemNativeRegFreeTmp(pReNative, idxRegBase);
5928 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5929 return off;
5930}
5931
5932
5933#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5934 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5935 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5936
5937#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5938 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5939 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5940
5941#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5942 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5943 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5944
5945/**
5946 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5947 *
5948 * @returns New off.
5949 * @param pReNative .
5950 * @param off .
5951 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5952 * bit 4 to REX.X. The two bits are part of the
5953 * REG sub-field, which isn't needed in this
5954 * function.
5955 * @param uSibAndRspOffset Two parts:
5956 * - The first 8 bits make up the SIB byte.
5957 * - The next 8 bits are the fixed RSP/ESP offset
5958 * in case of a pop [xSP].
5959 * @param u32Disp The displacement byte/word/dword, if any.
5960 * @param cbInstr The size of the fully decoded instruction. Used
5961 * for RIP relative addressing.
5962 * @param idxVarRet The result variable number.
5963 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5964 * when calculating the address.
5965 *
5966 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5967 */
5968DECL_INLINE_THROW(uint32_t)
5969iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5970 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5971{
5972 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5973
5974 /*
5975 * Special case the rip + disp32 form first.
5976 */
5977 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5978 {
5979#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5980 /* Need to take the current PC offset into account for the displacement, no need to flush here
5981 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5982 u32Disp += pReNative->Core.offPc;
5983#endif
5984
5985 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5986 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5987 kIemNativeGstRegUse_ReadOnly);
5988#ifdef RT_ARCH_AMD64
5989 if (f64Bit)
5990 {
5991 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5992 if ((int32_t)offFinalDisp == offFinalDisp)
5993 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5994 else
5995 {
5996 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5997 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5998 }
5999 }
6000 else
6001 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
6002
6003#elif defined(RT_ARCH_ARM64)
6004 if (f64Bit)
6005 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6006 (int64_t)(int32_t)u32Disp + cbInstr);
6007 else
6008 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6009 (int32_t)u32Disp + cbInstr);
6010
6011#else
6012# error "Port me!"
6013#endif
6014 iemNativeRegFreeTmp(pReNative, idxRegPc);
6015 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6016 return off;
6017 }
6018
6019 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6020 int64_t i64EffAddr = 0;
6021 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6022 {
6023 case 0: break;
6024 case 1: i64EffAddr = (int8_t)u32Disp; break;
6025 case 2: i64EffAddr = (int32_t)u32Disp; break;
6026 default: AssertFailed();
6027 }
6028
6029 /* Get the register (or SIB) value. */
6030 uint8_t idxGstRegBase = UINT8_MAX;
6031 uint8_t idxGstRegIndex = UINT8_MAX;
6032 uint8_t cShiftIndex = 0;
6033 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6034 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6035 else /* SIB: */
6036 {
6037 /* index /w scaling . */
6038 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6039 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6040 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6041 if (idxGstRegIndex == 4)
6042 {
6043 /* no index */
6044 cShiftIndex = 0;
6045 idxGstRegIndex = UINT8_MAX;
6046 }
6047
6048 /* base */
6049 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6050 if (idxGstRegBase == 4)
6051 {
6052 /* pop [rsp] hack */
6053 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6054 }
6055 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6056 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6057 {
6058 /* mod=0 and base=5 -> disp32, no base reg. */
6059 Assert(i64EffAddr == 0);
6060 i64EffAddr = (int32_t)u32Disp;
6061 idxGstRegBase = UINT8_MAX;
6062 }
6063 }
6064
6065 /*
6066 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6067 * the start of the function.
6068 */
6069 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6070 {
6071 if (f64Bit)
6072 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6073 else
6074 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6075 return off;
6076 }
6077
6078 /*
6079 * Now emit code that calculates:
6080 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6081 * or if !f64Bit:
6082 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6083 */
6084 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6085 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6086 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6087 kIemNativeGstRegUse_ReadOnly);
6088 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6089 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6090 kIemNativeGstRegUse_ReadOnly);
6091
6092 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6093 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6094 {
6095 idxRegBase = idxRegIndex;
6096 idxRegIndex = UINT8_MAX;
6097 }
6098
6099#ifdef RT_ARCH_AMD64
6100 uint8_t bFinalAdj;
6101 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6102 bFinalAdj = 0; /* likely */
6103 else
6104 {
6105 /* pop [rsp] with a problematic disp32 value. Split out the
6106 RSP offset and add it separately afterwards (bFinalAdj). */
6107 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6108 Assert(idxGstRegBase == X86_GREG_xSP);
6109 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6110 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6111 Assert(bFinalAdj != 0);
6112 i64EffAddr -= bFinalAdj;
6113 Assert((int32_t)i64EffAddr == i64EffAddr);
6114 }
6115 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6116//pReNative->pInstrBuf[off++] = 0xcc;
6117
6118 if (idxRegIndex == UINT8_MAX)
6119 {
6120 if (u32EffAddr == 0)
6121 {
6122 /* mov ret, base */
6123 if (f64Bit)
6124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6125 else
6126 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6127 }
6128 else
6129 {
6130 /* lea ret, [base + disp32] */
6131 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6132 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6133 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6134 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6135 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6136 | (f64Bit ? X86_OP_REX_W : 0);
6137 pbCodeBuf[off++] = 0x8d;
6138 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6139 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6140 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6141 else
6142 {
6143 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6144 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6145 }
6146 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6147 if (bMod == X86_MOD_MEM4)
6148 {
6149 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6150 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6151 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6152 }
6153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6154 }
6155 }
6156 else
6157 {
6158 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6160 if (idxRegBase == UINT8_MAX)
6161 {
6162 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6163 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6164 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6165 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6166 | (f64Bit ? X86_OP_REX_W : 0);
6167 pbCodeBuf[off++] = 0x8d;
6168 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6169 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6170 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6171 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6172 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6173 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6174 }
6175 else
6176 {
6177 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6178 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6179 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6180 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6181 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6182 | (f64Bit ? X86_OP_REX_W : 0);
6183 pbCodeBuf[off++] = 0x8d;
6184 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6185 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6186 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6187 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6188 if (bMod != X86_MOD_MEM0)
6189 {
6190 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6191 if (bMod == X86_MOD_MEM4)
6192 {
6193 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6194 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6195 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6196 }
6197 }
6198 }
6199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6200 }
6201
6202 if (!bFinalAdj)
6203 { /* likely */ }
6204 else
6205 {
6206 Assert(f64Bit);
6207 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6208 }
6209
6210#elif defined(RT_ARCH_ARM64)
6211 if (i64EffAddr == 0)
6212 {
6213 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6214 if (idxRegIndex == UINT8_MAX)
6215 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6216 else if (idxRegBase != UINT8_MAX)
6217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6218 f64Bit, false /*fSetFlags*/, cShiftIndex);
6219 else
6220 {
6221 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6222 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6223 }
6224 }
6225 else
6226 {
6227 if (f64Bit)
6228 { /* likely */ }
6229 else
6230 i64EffAddr = (int32_t)i64EffAddr;
6231
6232 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6233 {
6234 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6235 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6236 }
6237 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6238 {
6239 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6240 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6241 }
6242 else
6243 {
6244 if (f64Bit)
6245 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6246 else
6247 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6248 if (idxRegBase != UINT8_MAX)
6249 {
6250 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6251 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6252 }
6253 }
6254 if (idxRegIndex != UINT8_MAX)
6255 {
6256 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6257 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6258 f64Bit, false /*fSetFlags*/, cShiftIndex);
6259 }
6260 }
6261
6262#else
6263# error "port me"
6264#endif
6265
6266 if (idxRegIndex != UINT8_MAX)
6267 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6268 if (idxRegBase != UINT8_MAX)
6269 iemNativeRegFreeTmp(pReNative, idxRegBase);
6270 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6271 return off;
6272}
6273
6274
6275/*********************************************************************************************************************************
6276* Memory fetches and stores common *
6277*********************************************************************************************************************************/
6278
6279typedef enum IEMNATIVEMITMEMOP
6280{
6281 kIemNativeEmitMemOp_Store = 0,
6282 kIemNativeEmitMemOp_Fetch,
6283 kIemNativeEmitMemOp_Fetch_Zx_U16,
6284 kIemNativeEmitMemOp_Fetch_Zx_U32,
6285 kIemNativeEmitMemOp_Fetch_Zx_U64,
6286 kIemNativeEmitMemOp_Fetch_Sx_U16,
6287 kIemNativeEmitMemOp_Fetch_Sx_U32,
6288 kIemNativeEmitMemOp_Fetch_Sx_U64
6289} IEMNATIVEMITMEMOP;
6290
6291/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6292 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6293 * (with iSegReg = UINT8_MAX). */
6294DECL_INLINE_THROW(uint32_t)
6295iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6296 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6297 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6298{
6299 /*
6300 * Assert sanity.
6301 */
6302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6303 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6304 Assert( enmOp != kIemNativeEmitMemOp_Store
6305 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6306 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6307 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6308 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6309 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6310 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6311 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6312 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6313#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6314 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6315 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6316#else
6317 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6318#endif
6319 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6320 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6321#ifdef VBOX_STRICT
6322 if (iSegReg == UINT8_MAX)
6323 {
6324 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6325 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6326 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6327 switch (cbMem)
6328 {
6329 case 1:
6330 Assert( pfnFunction
6331 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6332 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6333 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6334 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6335 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6336 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6337 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6338 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6339 : UINT64_C(0xc000b000a0009000) ));
6340 Assert(!fAlignMaskAndCtl);
6341 break;
6342 case 2:
6343 Assert( pfnFunction
6344 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6345 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6346 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6347 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6348 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6349 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6350 : UINT64_C(0xc000b000a0009000) ));
6351 Assert(fAlignMaskAndCtl <= 1);
6352 break;
6353 case 4:
6354 Assert( pfnFunction
6355 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6356 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6357 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6358 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6359 : UINT64_C(0xc000b000a0009000) ));
6360 Assert(fAlignMaskAndCtl <= 3);
6361 break;
6362 case 8:
6363 Assert( pfnFunction
6364 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6365 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6366 : UINT64_C(0xc000b000a0009000) ));
6367 Assert(fAlignMaskAndCtl <= 7);
6368 break;
6369#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6370 case sizeof(RTUINT128U):
6371 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6372 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6373 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6374 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6375 || ( enmOp == kIemNativeEmitMemOp_Store
6376 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6377 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6378 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6379 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6380 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6381 : fAlignMaskAndCtl <= 15);
6382 break;
6383 case sizeof(RTUINT256U):
6384 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6385 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6386 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6387 || ( enmOp == kIemNativeEmitMemOp_Store
6388 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6389 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6390 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6391 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6392 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6393 : fAlignMaskAndCtl <= 31);
6394 break;
6395#endif
6396 }
6397 }
6398 else
6399 {
6400 Assert(iSegReg < 6);
6401 switch (cbMem)
6402 {
6403 case 1:
6404 Assert( pfnFunction
6405 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6406 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6407 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6408 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6409 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6410 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6411 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6412 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6413 : UINT64_C(0xc000b000a0009000) ));
6414 Assert(!fAlignMaskAndCtl);
6415 break;
6416 case 2:
6417 Assert( pfnFunction
6418 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6419 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6420 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6421 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6422 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6423 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6424 : UINT64_C(0xc000b000a0009000) ));
6425 Assert(fAlignMaskAndCtl <= 1);
6426 break;
6427 case 4:
6428 Assert( pfnFunction
6429 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6430 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6431 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6432 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6433 : UINT64_C(0xc000b000a0009000) ));
6434 Assert(fAlignMaskAndCtl <= 3);
6435 break;
6436 case 8:
6437 Assert( pfnFunction
6438 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6439 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6440 : UINT64_C(0xc000b000a0009000) ));
6441 Assert(fAlignMaskAndCtl <= 7);
6442 break;
6443#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6444 case sizeof(RTUINT128U):
6445 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6446 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6447 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6448 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6449 || ( enmOp == kIemNativeEmitMemOp_Store
6450 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6451 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6452 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6453 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6454 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6455 : fAlignMaskAndCtl <= 15);
6456 break;
6457 case sizeof(RTUINT256U):
6458 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6459 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6460 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6461 || ( enmOp == kIemNativeEmitMemOp_Store
6462 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6463 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6464 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6465 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6466 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6467 : fAlignMaskAndCtl <= 31);
6468 break;
6469#endif
6470 }
6471 }
6472#endif
6473
6474#ifdef VBOX_STRICT
6475 /*
6476 * Check that the fExec flags we've got make sense.
6477 */
6478 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6479#endif
6480
6481 /*
6482 * To keep things simple we have to commit any pending writes first as we
6483 * may end up making calls.
6484 */
6485 /** @todo we could postpone this till we make the call and reload the
6486 * registers after returning from the call. Not sure if that's sensible or
6487 * not, though. */
6488#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6489 off = iemNativeRegFlushPendingWrites(pReNative, off);
6490#else
6491 /* The program counter is treated differently for now. */
6492 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6493#endif
6494
6495#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6496 /*
6497 * Move/spill/flush stuff out of call-volatile registers.
6498 * This is the easy way out. We could contain this to the tlb-miss branch
6499 * by saving and restoring active stuff here.
6500 */
6501 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6502#endif
6503
6504 /*
6505 * Define labels and allocate the result register (trying for the return
6506 * register if we can).
6507 */
6508 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6509#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6510 uint8_t idxRegValueFetch = UINT8_MAX;
6511
6512 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6513 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6514 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6515 else
6516 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6517 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6518 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6519 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6520#else
6521 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6522 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6523 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6524 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6525#endif
6526 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6527
6528#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6529 uint8_t idxRegValueStore = UINT8_MAX;
6530
6531 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6532 idxRegValueStore = !TlbState.fSkip
6533 && enmOp == kIemNativeEmitMemOp_Store
6534 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6535 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6536 : UINT8_MAX;
6537 else
6538 idxRegValueStore = !TlbState.fSkip
6539 && enmOp == kIemNativeEmitMemOp_Store
6540 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6541 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6542 : UINT8_MAX;
6543
6544#else
6545 uint8_t const idxRegValueStore = !TlbState.fSkip
6546 && enmOp == kIemNativeEmitMemOp_Store
6547 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6548 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6549 : UINT8_MAX;
6550#endif
6551 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6552 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6553 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6554 : UINT32_MAX;
6555
6556 /*
6557 * Jump to the TLB lookup code.
6558 */
6559 if (!TlbState.fSkip)
6560 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6561
6562 /*
6563 * TlbMiss:
6564 *
6565 * Call helper to do the fetching.
6566 * We flush all guest register shadow copies here.
6567 */
6568 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6569
6570#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6571 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6572#else
6573 RT_NOREF(idxInstr);
6574#endif
6575
6576#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6577 if (pReNative->Core.offPc)
6578 {
6579 /*
6580 * Update the program counter but restore it at the end of the TlbMiss branch.
6581 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6582 * which are hopefully much more frequent, reducing the amount of memory accesses.
6583 */
6584 /* Allocate a temporary PC register. */
6585 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6586
6587 /* Perform the addition and store the result. */
6588 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6589 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6590
6591 /* Free and flush the PC register. */
6592 iemNativeRegFreeTmp(pReNative, idxPcReg);
6593 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6594 }
6595#endif
6596
6597#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6598 /* Save variables in volatile registers. */
6599 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6600 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6601 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6602 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6603#endif
6604
6605 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6606 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6607#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6608 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6609 {
6610 /*
6611 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6612 *
6613 * @note There was a register variable assigned to the variable for the TlbLookup case above
6614 * which must not be freed or the value loaded into the register will not be synced into the register
6615 * further down the road because the variable doesn't know it had a variable assigned.
6616 *
6617 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6618 * as it will be overwritten anyway.
6619 */
6620 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6621 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6622 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6623 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6624 }
6625 else
6626#endif
6627 if (enmOp == kIemNativeEmitMemOp_Store)
6628 {
6629 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6630 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6631#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6632 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6633#else
6634 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6635 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6636#endif
6637 }
6638
6639 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6640 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6641#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6642 fVolGregMask);
6643#else
6644 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6645#endif
6646
6647 if (iSegReg != UINT8_MAX)
6648 {
6649 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6650 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6651 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6652 }
6653
6654 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6655 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6656
6657 /* Done setting up parameters, make the call. */
6658 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6659
6660 /*
6661 * Put the result in the right register if this is a fetch.
6662 */
6663 if (enmOp != kIemNativeEmitMemOp_Store)
6664 {
6665#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6666 if ( cbMem == sizeof(RTUINT128U)
6667 || cbMem == sizeof(RTUINT256U))
6668 {
6669 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6670
6671 /* Sync the value on the stack with the host register assigned to the variable. */
6672 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6673 }
6674 else
6675#endif
6676 {
6677 Assert(idxRegValueFetch == pVarValue->idxReg);
6678 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6679 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6680 }
6681 }
6682
6683#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6684 /* Restore variables and guest shadow registers to volatile registers. */
6685 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6686 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6687#endif
6688
6689#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6690 if (pReNative->Core.offPc)
6691 {
6692 /*
6693 * Time to restore the program counter to its original value.
6694 */
6695 /* Allocate a temporary PC register. */
6696 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6697 kIemNativeGstRegUse_ForUpdate);
6698
6699 /* Restore the original value. */
6700 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6701 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6702
6703 /* Free and flush the PC register. */
6704 iemNativeRegFreeTmp(pReNative, idxPcReg);
6705 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6706 }
6707#endif
6708
6709#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6710 if (!TlbState.fSkip)
6711 {
6712 /* end of TlbMiss - Jump to the done label. */
6713 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6714 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6715
6716 /*
6717 * TlbLookup:
6718 */
6719 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6720 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6721 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6722
6723 /*
6724 * Emit code to do the actual storing / fetching.
6725 */
6726 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6727# ifdef IEM_WITH_TLB_STATISTICS
6728 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6729 enmOp == kIemNativeEmitMemOp_Store
6730 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6731 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6732# endif
6733 switch (enmOp)
6734 {
6735 case kIemNativeEmitMemOp_Store:
6736 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6737 {
6738 switch (cbMem)
6739 {
6740 case 1:
6741 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6742 break;
6743 case 2:
6744 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6745 break;
6746 case 4:
6747 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6748 break;
6749 case 8:
6750 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6751 break;
6752#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6753 case sizeof(RTUINT128U):
6754 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6755 break;
6756 case sizeof(RTUINT256U):
6757 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6758 break;
6759#endif
6760 default:
6761 AssertFailed();
6762 }
6763 }
6764 else
6765 {
6766 switch (cbMem)
6767 {
6768 case 1:
6769 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6770 idxRegMemResult, TlbState.idxReg1);
6771 break;
6772 case 2:
6773 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6774 idxRegMemResult, TlbState.idxReg1);
6775 break;
6776 case 4:
6777 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6778 idxRegMemResult, TlbState.idxReg1);
6779 break;
6780 case 8:
6781 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6782 idxRegMemResult, TlbState.idxReg1);
6783 break;
6784 default:
6785 AssertFailed();
6786 }
6787 }
6788 break;
6789
6790 case kIemNativeEmitMemOp_Fetch:
6791 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6792 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6793 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6794 switch (cbMem)
6795 {
6796 case 1:
6797 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6798 break;
6799 case 2:
6800 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6801 break;
6802 case 4:
6803 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6804 break;
6805 case 8:
6806 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6807 break;
6808#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6809 case sizeof(RTUINT128U):
6810 /*
6811 * No need to sync back the register with the stack, this is done by the generic variable handling
6812 * code if there is a register assigned to a variable and the stack must be accessed.
6813 */
6814 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6815 break;
6816 case sizeof(RTUINT256U):
6817 /*
6818 * No need to sync back the register with the stack, this is done by the generic variable handling
6819 * code if there is a register assigned to a variable and the stack must be accessed.
6820 */
6821 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6822 break;
6823#endif
6824 default:
6825 AssertFailed();
6826 }
6827 break;
6828
6829 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6830 Assert(cbMem == 1);
6831 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6832 break;
6833
6834 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6835 Assert(cbMem == 1 || cbMem == 2);
6836 if (cbMem == 1)
6837 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6838 else
6839 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6840 break;
6841
6842 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6843 switch (cbMem)
6844 {
6845 case 1:
6846 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6847 break;
6848 case 2:
6849 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6850 break;
6851 case 4:
6852 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6853 break;
6854 default:
6855 AssertFailed();
6856 }
6857 break;
6858
6859 default:
6860 AssertFailed();
6861 }
6862
6863 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6864
6865 /*
6866 * TlbDone:
6867 */
6868 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6869
6870 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6871
6872# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6873 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6874 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6875# endif
6876 }
6877#else
6878 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6879#endif
6880
6881 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6882 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6883 return off;
6884}
6885
6886
6887
6888/*********************************************************************************************************************************
6889* Memory fetches (IEM_MEM_FETCH_XXX). *
6890*********************************************************************************************************************************/
6891
6892/* 8-bit segmented: */
6893#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6894 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6895 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6896 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6897
6898#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6900 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6901 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6902
6903#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6905 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6906 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6907
6908#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6910 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6911 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6912
6913#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6915 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6916 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6917
6918#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6919 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6920 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6921 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6922
6923#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6925 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6926 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6927
6928/* 16-bit segmented: */
6929#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6931 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6932 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6933
6934#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6935 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6936 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6937 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6938
6939#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6940 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6941 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6942 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6943
6944#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6945 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6946 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6947 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6948
6949#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6950 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6951 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6952 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6953
6954#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6955 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6956 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6957 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6958
6959
6960/* 32-bit segmented: */
6961#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6962 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6963 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6964 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6965
6966#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6968 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6969 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6970
6971#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6973 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6974 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6975
6976#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6977 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6978 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6979 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6980
6981#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6982 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6983 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6984 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6985
6986#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6987 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6988 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6989 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6990
6991#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6993 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6994 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6995
6996AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6997#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6998 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6999 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7000 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7001
7002
7003/* 64-bit segmented: */
7004#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7006 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7007 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7008
7009AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7010#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7012 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7013 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7014
7015
7016/* 8-bit flat: */
7017#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7018 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7019 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7020 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7021
7022#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7023 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7024 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7025 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7026
7027#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7028 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7029 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7030 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7031
7032#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7034 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7035 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7036
7037#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7038 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7039 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7040 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7041
7042#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7043 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7044 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7045 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7046
7047#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7048 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7049 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7050 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7051
7052
7053/* 16-bit flat: */
7054#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7056 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7057 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7058
7059#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7061 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7062 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7063
7064#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7066 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7067 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7068
7069#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7071 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7072 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7073
7074#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7076 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7077 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7078
7079#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7080 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7081 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7082 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7083
7084/* 32-bit flat: */
7085#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7086 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7087 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7088 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7089
7090#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7091 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7092 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7093 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7094
7095#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7096 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7097 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7098 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7099
7100#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7101 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7102 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7103 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7104
7105#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7106 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7107 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7108 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7109
7110#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7111 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7112 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7113 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7114
7115#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7117 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7118 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7119
7120#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7122 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7123 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7124
7125
7126/* 64-bit flat: */
7127#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7128 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7129 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7130 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7131
7132#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7133 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7134 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7135 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7136
7137#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7138/* 128-bit segmented: */
7139#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7140 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7141 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7142 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7143
7144#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7145 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7146 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7147 kIemNativeEmitMemOp_Fetch, \
7148 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7149
7150AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7151#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7152 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7153 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7154 kIemNativeEmitMemOp_Fetch, \
7155 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7156
7157#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7159 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7160 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7161
7162#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7164 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7165 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7166
7167
7168/* 128-bit flat: */
7169#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7170 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7171 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7172 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7173
7174#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7175 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7176 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7177 kIemNativeEmitMemOp_Fetch, \
7178 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7179
7180#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7181 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7182 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7183 kIemNativeEmitMemOp_Fetch, \
7184 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7185
7186#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7187 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7188 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7189 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7190
7191#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7192 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7193 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7194 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7195
7196/* 256-bit segmented: */
7197#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7198 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7199 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7200 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7201
7202#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7203 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7204 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7205 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7206
7207#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7208 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7209 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7210 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7211
7212#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7213 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7214 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7215 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7216
7217
7218/* 256-bit flat: */
7219#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7220 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7221 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7222 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7223
7224#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7225 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7226 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7227 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7228
7229#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7230 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7231 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7232 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7233
7234#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7235 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7236 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7237 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7238
7239#endif
7240
7241
7242/*********************************************************************************************************************************
7243* Memory stores (IEM_MEM_STORE_XXX). *
7244*********************************************************************************************************************************/
7245
7246#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7247 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7248 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7249 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7250
7251#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7252 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7253 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7254 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7255
7256#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7257 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7258 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7259 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7260
7261#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7262 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7263 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7264 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7265
7266
7267#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7268 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7269 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7270 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7271
7272#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7273 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7274 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7275 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7276
7277#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7278 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7279 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7280 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7281
7282#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7283 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7284 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7285 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7286
7287
7288#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7289 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7290 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7291
7292#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7293 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7294 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7295
7296#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7297 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7298 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7299
7300#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7301 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7302 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7303
7304
7305#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7306 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7307 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7308
7309#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7310 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7311 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7312
7313#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7314 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7315 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7316
7317#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7318 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7319 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7320
7321/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7322 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7323DECL_INLINE_THROW(uint32_t)
7324iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7325 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7326{
7327 /*
7328 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7329 * to do the grunt work.
7330 */
7331 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7332 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7333 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7334 pfnFunction, idxInstr);
7335 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7336 return off;
7337}
7338
7339
7340#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7341# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7342 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7343 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7344 kIemNativeEmitMemOp_Store, \
7345 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7346
7347# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7348 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7349 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7350 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7351
7352# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7353 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7354 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7355 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7356
7357# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7358 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7359 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7360 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7361
7362
7363# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7364 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7365 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7366 kIemNativeEmitMemOp_Store, \
7367 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7368
7369# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7370 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7371 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7372 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7373
7374# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7375 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7376 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7377 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7378
7379# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7380 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7381 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7382 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7383#endif
7384
7385
7386
7387/*********************************************************************************************************************************
7388* Stack Accesses. *
7389*********************************************************************************************************************************/
7390/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7391#define IEM_MC_PUSH_U16(a_u16Value) \
7392 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7393 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7394#define IEM_MC_PUSH_U32(a_u32Value) \
7395 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7396 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7397#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7398 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7399 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7400#define IEM_MC_PUSH_U64(a_u64Value) \
7401 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7402 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7403
7404#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7405 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7406 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7407#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7408 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7409 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7410#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7411 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7412 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7413
7414#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7415 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7416 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7417#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7418 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7419 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7420
7421
7422/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7423DECL_INLINE_THROW(uint32_t)
7424iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7425 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7426{
7427 /*
7428 * Assert sanity.
7429 */
7430 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7431 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7432#ifdef VBOX_STRICT
7433 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7434 {
7435 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7436 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7437 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7438 Assert( pfnFunction
7439 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7440 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7441 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7442 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7443 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7444 : UINT64_C(0xc000b000a0009000) ));
7445 }
7446 else
7447 Assert( pfnFunction
7448 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7449 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7450 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7451 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7452 : UINT64_C(0xc000b000a0009000) ));
7453#endif
7454
7455#ifdef VBOX_STRICT
7456 /*
7457 * Check that the fExec flags we've got make sense.
7458 */
7459 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7460#endif
7461
7462 /*
7463 * To keep things simple we have to commit any pending writes first as we
7464 * may end up making calls.
7465 */
7466 /** @todo we could postpone this till we make the call and reload the
7467 * registers after returning from the call. Not sure if that's sensible or
7468 * not, though. */
7469 off = iemNativeRegFlushPendingWrites(pReNative, off);
7470
7471 /*
7472 * First we calculate the new RSP and the effective stack pointer value.
7473 * For 64-bit mode and flat 32-bit these two are the same.
7474 * (Code structure is very similar to that of PUSH)
7475 */
7476 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7477 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7478 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7479 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7480 ? cbMem : sizeof(uint16_t);
7481 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7482 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7483 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7484 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7485 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7486 if (cBitsFlat != 0)
7487 {
7488 Assert(idxRegEffSp == idxRegRsp);
7489 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7490 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7491 if (cBitsFlat == 64)
7492 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7493 else
7494 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7495 }
7496 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7497 {
7498 Assert(idxRegEffSp != idxRegRsp);
7499 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7500 kIemNativeGstRegUse_ReadOnly);
7501#ifdef RT_ARCH_AMD64
7502 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7503#else
7504 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7505#endif
7506 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7507 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7508 offFixupJumpToUseOtherBitSp = off;
7509 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7510 {
7511 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7512 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7513 }
7514 else
7515 {
7516 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7517 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7518 }
7519 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7520 }
7521 /* SpUpdateEnd: */
7522 uint32_t const offLabelSpUpdateEnd = off;
7523
7524 /*
7525 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7526 * we're skipping lookup).
7527 */
7528 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7529 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7530 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7531 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7532 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7533 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7534 : UINT32_MAX;
7535 uint8_t const idxRegValue = !TlbState.fSkip
7536 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7537 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7538 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7539 : UINT8_MAX;
7540 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7541
7542
7543 if (!TlbState.fSkip)
7544 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7545 else
7546 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7547
7548 /*
7549 * Use16BitSp:
7550 */
7551 if (cBitsFlat == 0)
7552 {
7553#ifdef RT_ARCH_AMD64
7554 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7555#else
7556 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7557#endif
7558 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7559 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7560 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7561 else
7562 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7563 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7565 }
7566
7567 /*
7568 * TlbMiss:
7569 *
7570 * Call helper to do the pushing.
7571 */
7572 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7573
7574#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7575 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7576#else
7577 RT_NOREF(idxInstr);
7578#endif
7579
7580 /* Save variables in volatile registers. */
7581 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7582 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7583 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7584 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7585 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7586
7587 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7588 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7589 {
7590 /* Swap them using ARG0 as temp register: */
7591 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7592 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7593 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7594 }
7595 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7596 {
7597 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7598 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7599 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7600
7601 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7602 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7603 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7604 }
7605 else
7606 {
7607 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7608 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7609
7610 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7611 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7612 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7613 }
7614
7615 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7616 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7617
7618 /* Done setting up parameters, make the call. */
7619 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7620
7621 /* Restore variables and guest shadow registers to volatile registers. */
7622 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7623 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7624
7625#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7626 if (!TlbState.fSkip)
7627 {
7628 /* end of TlbMiss - Jump to the done label. */
7629 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7630 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7631
7632 /*
7633 * TlbLookup:
7634 */
7635 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7636 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7637
7638 /*
7639 * Emit code to do the actual storing / fetching.
7640 */
7641 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7642# ifdef IEM_WITH_TLB_STATISTICS
7643 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7644 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7645# endif
7646 if (idxRegValue != UINT8_MAX)
7647 {
7648 switch (cbMemAccess)
7649 {
7650 case 2:
7651 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7652 break;
7653 case 4:
7654 if (!fIsIntelSeg)
7655 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7656 else
7657 {
7658 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7659 PUSH FS in real mode, so we have to try emulate that here.
7660 We borrow the now unused idxReg1 from the TLB lookup code here. */
7661 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7662 kIemNativeGstReg_EFlags);
7663 if (idxRegEfl != UINT8_MAX)
7664 {
7665#ifdef ARCH_AMD64
7666 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7667 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7668 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7669#else
7670 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7671 off, TlbState.idxReg1, idxRegEfl,
7672 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7673#endif
7674 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7675 }
7676 else
7677 {
7678 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7679 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7680 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7681 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7682 }
7683 /* ASSUMES the upper half of idxRegValue is ZERO. */
7684 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7685 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7686 }
7687 break;
7688 case 8:
7689 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7690 break;
7691 default:
7692 AssertFailed();
7693 }
7694 }
7695 else
7696 {
7697 switch (cbMemAccess)
7698 {
7699 case 2:
7700 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7701 idxRegMemResult, TlbState.idxReg1);
7702 break;
7703 case 4:
7704 Assert(!fIsSegReg);
7705 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7706 idxRegMemResult, TlbState.idxReg1);
7707 break;
7708 case 8:
7709 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7710 break;
7711 default:
7712 AssertFailed();
7713 }
7714 }
7715
7716 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7717 TlbState.freeRegsAndReleaseVars(pReNative);
7718
7719 /*
7720 * TlbDone:
7721 *
7722 * Commit the new RSP value.
7723 */
7724 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7725 }
7726#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7727
7728#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7729 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7730#endif
7731 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7732 if (idxRegEffSp != idxRegRsp)
7733 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7734
7735 /* The value variable is implictly flushed. */
7736 if (idxRegValue != UINT8_MAX)
7737 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7738 iemNativeVarFreeLocal(pReNative, idxVarValue);
7739
7740 return off;
7741}
7742
7743
7744
7745/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7746#define IEM_MC_POP_GREG_U16(a_iGReg) \
7747 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7748 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7749#define IEM_MC_POP_GREG_U32(a_iGReg) \
7750 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7751 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7752#define IEM_MC_POP_GREG_U64(a_iGReg) \
7753 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7754 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7755
7756#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7757 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7758 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7759#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7760 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7761 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7762
7763#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7764 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7765 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7766#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7767 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7768 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7769
7770
7771DECL_FORCE_INLINE_THROW(uint32_t)
7772iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7773 uint8_t idxRegTmp)
7774{
7775 /* Use16BitSp: */
7776#ifdef RT_ARCH_AMD64
7777 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7778 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7779 RT_NOREF(idxRegTmp);
7780#else
7781 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7782 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7783 /* add tmp, regrsp, #cbMem */
7784 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7785 /* and tmp, tmp, #0xffff */
7786 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7787 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7788 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7789 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7790#endif
7791 return off;
7792}
7793
7794
7795DECL_FORCE_INLINE(uint32_t)
7796iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7797{
7798 /* Use32BitSp: */
7799 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7800 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7801 return off;
7802}
7803
7804
7805/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7806DECL_INLINE_THROW(uint32_t)
7807iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7808 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7809{
7810 /*
7811 * Assert sanity.
7812 */
7813 Assert(idxGReg < 16);
7814#ifdef VBOX_STRICT
7815 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7816 {
7817 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7818 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7819 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7820 Assert( pfnFunction
7821 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7822 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7823 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7824 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7825 : UINT64_C(0xc000b000a0009000) ));
7826 }
7827 else
7828 Assert( pfnFunction
7829 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7830 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7831 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7832 : UINT64_C(0xc000b000a0009000) ));
7833#endif
7834
7835#ifdef VBOX_STRICT
7836 /*
7837 * Check that the fExec flags we've got make sense.
7838 */
7839 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7840#endif
7841
7842 /*
7843 * To keep things simple we have to commit any pending writes first as we
7844 * may end up making calls.
7845 */
7846 off = iemNativeRegFlushPendingWrites(pReNative, off);
7847
7848 /*
7849 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7850 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7851 * directly as the effective stack pointer.
7852 * (Code structure is very similar to that of PUSH)
7853 */
7854 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7855 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7856 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7857 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7858 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7859 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7860 * will be the resulting register value. */
7861 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7862
7863 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7864 if (cBitsFlat != 0)
7865 {
7866 Assert(idxRegEffSp == idxRegRsp);
7867 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7868 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7869 }
7870 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7871 {
7872 Assert(idxRegEffSp != idxRegRsp);
7873 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7874 kIemNativeGstRegUse_ReadOnly);
7875#ifdef RT_ARCH_AMD64
7876 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7877#else
7878 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7879#endif
7880 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7881 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7882 offFixupJumpToUseOtherBitSp = off;
7883 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7884 {
7885/** @todo can skip idxRegRsp updating when popping ESP. */
7886 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7887 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7888 }
7889 else
7890 {
7891 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7892 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7893 }
7894 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7895 }
7896 /* SpUpdateEnd: */
7897 uint32_t const offLabelSpUpdateEnd = off;
7898
7899 /*
7900 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7901 * we're skipping lookup).
7902 */
7903 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7904 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7905 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7906 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7907 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7908 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7909 : UINT32_MAX;
7910
7911 if (!TlbState.fSkip)
7912 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7913 else
7914 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7915
7916 /*
7917 * Use16BitSp:
7918 */
7919 if (cBitsFlat == 0)
7920 {
7921#ifdef RT_ARCH_AMD64
7922 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7923#else
7924 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7925#endif
7926 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7927 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7928 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7929 else
7930 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7931 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7932 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7933 }
7934
7935 /*
7936 * TlbMiss:
7937 *
7938 * Call helper to do the pushing.
7939 */
7940 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7941
7942#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7944#else
7945 RT_NOREF(idxInstr);
7946#endif
7947
7948 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7949 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7950 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7951 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7952
7953
7954 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7955 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7956 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7957
7958 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7959 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7960
7961 /* Done setting up parameters, make the call. */
7962 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7963
7964 /* Move the return register content to idxRegMemResult. */
7965 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7966 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7967
7968 /* Restore variables and guest shadow registers to volatile registers. */
7969 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7970 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7971
7972#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7973 if (!TlbState.fSkip)
7974 {
7975 /* end of TlbMiss - Jump to the done label. */
7976 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7977 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7978
7979 /*
7980 * TlbLookup:
7981 */
7982 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7983 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7984
7985 /*
7986 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7987 */
7988 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7989# ifdef IEM_WITH_TLB_STATISTICS
7990 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7991 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7992# endif
7993 switch (cbMem)
7994 {
7995 case 2:
7996 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7997 break;
7998 case 4:
7999 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8000 break;
8001 case 8:
8002 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8003 break;
8004 default:
8005 AssertFailed();
8006 }
8007
8008 TlbState.freeRegsAndReleaseVars(pReNative);
8009
8010 /*
8011 * TlbDone:
8012 *
8013 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8014 * commit the popped register value.
8015 */
8016 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8017 }
8018#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8019
8020 if (idxGReg != X86_GREG_xSP)
8021 {
8022 /* Set the register. */
8023 if (cbMem >= sizeof(uint32_t))
8024 {
8025#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8026 AssertMsg( pReNative->idxCurCall == 0
8027 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8028 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8029 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8030#endif
8031 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8032#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8033 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8034#endif
8035#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8036 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8037 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8038#endif
8039 }
8040 else
8041 {
8042 Assert(cbMem == sizeof(uint16_t));
8043 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8044 kIemNativeGstRegUse_ForUpdate);
8045 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8046#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8047 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8048#endif
8049 iemNativeRegFreeTmp(pReNative, idxRegDst);
8050 }
8051
8052 /* Complete RSP calculation for FLAT mode. */
8053 if (idxRegEffSp == idxRegRsp)
8054 {
8055 if (cBitsFlat == 64)
8056 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8057 else
8058 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8059 }
8060 }
8061 else
8062 {
8063 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8064 if (cbMem == sizeof(uint64_t))
8065 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8066 else if (cbMem == sizeof(uint32_t))
8067 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8068 else
8069 {
8070 if (idxRegEffSp == idxRegRsp)
8071 {
8072 if (cBitsFlat == 64)
8073 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8074 else
8075 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8076 }
8077 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8078 }
8079 }
8080
8081#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8083#endif
8084
8085 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8086 if (idxRegEffSp != idxRegRsp)
8087 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8088 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8089
8090 return off;
8091}
8092
8093
8094
8095/*********************************************************************************************************************************
8096* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8097*********************************************************************************************************************************/
8098
8099#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8101 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8102 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8103
8104#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8105 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8106 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8107 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8108
8109#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8110 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8111 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8112 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8113
8114#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8116 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8117 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8118
8119
8120#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8122 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8123 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8124
8125#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8126 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8127 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8128 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8129
8130#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8132 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8133 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8134
8135#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8137 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8138 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8139
8140#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8142 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8143 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8144
8145
8146#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8147 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8148 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8149 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8150
8151#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8153 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8154 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8155
8156#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8158 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8159 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8160
8161#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8162 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8163 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8164 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8165
8166#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8167 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8168 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8169 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8170
8171
8172#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8173 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8174 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8175 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8176
8177#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8179 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8180 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8181#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8182 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8183 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8184 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8185
8186#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8187 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8188 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8189 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8190
8191#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8192 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8193 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8194 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8195
8196
8197#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8198 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8199 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8200 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8201
8202#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8203 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8204 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8205 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8206
8207
8208#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8209 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8210 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8211 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8212
8213#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8214 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8215 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8216 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8217
8218#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8219 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8220 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8221 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8222
8223#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8224 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8225 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8226 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8227
8228
8229
8230#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8231 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8232 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8233 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8234
8235#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8236 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8237 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8238 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8239
8240#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8241 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8242 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8243 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8244
8245#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8246 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8247 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8248 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8249
8250
8251#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8252 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8253 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8254 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8255
8256#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8257 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8258 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8259 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8260
8261#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8262 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8263 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8264 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8265
8266#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8267 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8268 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8269 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8270
8271#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8272 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8273 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8274 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8275
8276
8277#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8278 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8279 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8280 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8281
8282#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8283 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8284 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8285 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8286
8287#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8288 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8289 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8290 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8291
8292#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8293 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8294 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8295 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8296
8297#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8298 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8299 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8300 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8301
8302
8303#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8304 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8305 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8306 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8307
8308#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8309 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8310 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8311 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8312
8313#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8314 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8315 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8316 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8317
8318#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8319 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8320 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8321 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8322
8323#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8324 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8325 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8326 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8327
8328
8329#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8330 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8331 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8332 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8333
8334#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8335 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8336 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8337 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8338
8339
8340#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8341 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8342 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8343 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8344
8345#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8346 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8347 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8348 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8349
8350#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8351 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8352 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8353 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8354
8355#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8356 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8357 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8358 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8359
8360
8361DECL_INLINE_THROW(uint32_t)
8362iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8363 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8364 uintptr_t pfnFunction, uint8_t idxInstr)
8365{
8366 /*
8367 * Assert sanity.
8368 */
8369 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8370 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8371 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8372 && pVarMem->cbVar == sizeof(void *),
8373 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8374
8375 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8376 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8377 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8378 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8379 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8380
8381 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8382 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8383 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8384 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8385 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8386
8387 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8388
8389 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8390
8391#ifdef VBOX_STRICT
8392# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8393 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8394 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8395 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8396 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8397# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8398 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8399 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8400 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8401
8402 if (iSegReg == UINT8_MAX)
8403 {
8404 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8405 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8406 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8407 switch (cbMem)
8408 {
8409 case 1:
8410 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8411 Assert(!fAlignMaskAndCtl);
8412 break;
8413 case 2:
8414 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8415 Assert(fAlignMaskAndCtl < 2);
8416 break;
8417 case 4:
8418 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8419 Assert(fAlignMaskAndCtl < 4);
8420 break;
8421 case 8:
8422 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8423 Assert(fAlignMaskAndCtl < 8);
8424 break;
8425 case 10:
8426 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8427 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8428 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8429 Assert(fAlignMaskAndCtl < 8);
8430 break;
8431 case 16:
8432 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8433 Assert(fAlignMaskAndCtl < 16);
8434 break;
8435# if 0
8436 case 32:
8437 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8438 Assert(fAlignMaskAndCtl < 32);
8439 break;
8440 case 64:
8441 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8442 Assert(fAlignMaskAndCtl < 64);
8443 break;
8444# endif
8445 default: AssertFailed(); break;
8446 }
8447 }
8448 else
8449 {
8450 Assert(iSegReg < 6);
8451 switch (cbMem)
8452 {
8453 case 1:
8454 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8455 Assert(!fAlignMaskAndCtl);
8456 break;
8457 case 2:
8458 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8459 Assert(fAlignMaskAndCtl < 2);
8460 break;
8461 case 4:
8462 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8463 Assert(fAlignMaskAndCtl < 4);
8464 break;
8465 case 8:
8466 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8467 Assert(fAlignMaskAndCtl < 8);
8468 break;
8469 case 10:
8470 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8471 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8472 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8473 Assert(fAlignMaskAndCtl < 8);
8474 break;
8475 case 16:
8476 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8477 Assert(fAlignMaskAndCtl < 16);
8478 break;
8479# if 0
8480 case 32:
8481 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8482 Assert(fAlignMaskAndCtl < 32);
8483 break;
8484 case 64:
8485 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8486 Assert(fAlignMaskAndCtl < 64);
8487 break;
8488# endif
8489 default: AssertFailed(); break;
8490 }
8491 }
8492# undef IEM_MAP_HLP_FN
8493# undef IEM_MAP_HLP_FN_NO_AT
8494#endif
8495
8496#ifdef VBOX_STRICT
8497 /*
8498 * Check that the fExec flags we've got make sense.
8499 */
8500 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8501#endif
8502
8503 /*
8504 * To keep things simple we have to commit any pending writes first as we
8505 * may end up making calls.
8506 */
8507 off = iemNativeRegFlushPendingWrites(pReNative, off);
8508
8509#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8510 /*
8511 * Move/spill/flush stuff out of call-volatile registers.
8512 * This is the easy way out. We could contain this to the tlb-miss branch
8513 * by saving and restoring active stuff here.
8514 */
8515 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8516 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8517#endif
8518
8519 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8520 while the tlb-miss codepath will temporarily put it on the stack.
8521 Set the the type to stack here so we don't need to do it twice below. */
8522 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8523 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8524 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8525 * lookup is done. */
8526
8527 /*
8528 * Define labels and allocate the result register (trying for the return
8529 * register if we can).
8530 */
8531 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8532 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8533 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8534 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8535 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8536 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8537 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8538 : UINT32_MAX;
8539//off=iemNativeEmitBrk(pReNative, off, 0);
8540 /*
8541 * Jump to the TLB lookup code.
8542 */
8543 if (!TlbState.fSkip)
8544 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8545
8546 /*
8547 * TlbMiss:
8548 *
8549 * Call helper to do the fetching.
8550 * We flush all guest register shadow copies here.
8551 */
8552 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8553
8554#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8555 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8556#else
8557 RT_NOREF(idxInstr);
8558#endif
8559
8560#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8561 /* Save variables in volatile registers. */
8562 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8563 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8564#endif
8565
8566 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8567 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8568#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8569 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8570#else
8571 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8572#endif
8573
8574 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8575 if (iSegReg != UINT8_MAX)
8576 {
8577 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8578 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8579 }
8580
8581 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8582 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8583 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8584
8585 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8586 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8587
8588 /* Done setting up parameters, make the call. */
8589 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8590
8591 /*
8592 * Put the output in the right registers.
8593 */
8594 Assert(idxRegMemResult == pVarMem->idxReg);
8595 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8596 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8597
8598#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8599 /* Restore variables and guest shadow registers to volatile registers. */
8600 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8601 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8602#endif
8603
8604 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8605 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8606
8607#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8608 if (!TlbState.fSkip)
8609 {
8610 /* end of tlbsmiss - Jump to the done label. */
8611 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8612 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8613
8614 /*
8615 * TlbLookup:
8616 */
8617 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8618 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8619# ifdef IEM_WITH_TLB_STATISTICS
8620 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8621 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8622# endif
8623
8624 /* [idxVarUnmapInfo] = 0; */
8625 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8626
8627 /*
8628 * TlbDone:
8629 */
8630 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8631
8632 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8633
8634# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8635 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8636 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8637# endif
8638 }
8639#else
8640 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8641#endif
8642
8643 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8644 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8645
8646 return off;
8647}
8648
8649
8650#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8651 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8652 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8653
8654#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8655 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8656 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8657
8658#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8659 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8660 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8661
8662#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8663 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8664 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8665
8666DECL_INLINE_THROW(uint32_t)
8667iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8668 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8669{
8670 /*
8671 * Assert sanity.
8672 */
8673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8674#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8675 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8676#endif
8677 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8678 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8679 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8680#ifdef VBOX_STRICT
8681 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8682 {
8683 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8684 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8685 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8686 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8687 case IEM_ACCESS_TYPE_WRITE:
8688 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8689 case IEM_ACCESS_TYPE_READ:
8690 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8691 default: AssertFailed();
8692 }
8693#else
8694 RT_NOREF(fAccess);
8695#endif
8696
8697 /*
8698 * To keep things simple we have to commit any pending writes first as we
8699 * may end up making calls (there shouldn't be any at this point, so this
8700 * is just for consistency).
8701 */
8702 /** @todo we could postpone this till we make the call and reload the
8703 * registers after returning from the call. Not sure if that's sensible or
8704 * not, though. */
8705 off = iemNativeRegFlushPendingWrites(pReNative, off);
8706
8707 /*
8708 * Move/spill/flush stuff out of call-volatile registers.
8709 *
8710 * We exclude any register holding the bUnmapInfo variable, as we'll be
8711 * checking it after returning from the call and will free it afterwards.
8712 */
8713 /** @todo save+restore active registers and maybe guest shadows in miss
8714 * scenario. */
8715 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8716 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8717
8718 /*
8719 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8720 * to call the unmap helper function.
8721 *
8722 * The likelyhood of it being zero is higher than for the TLB hit when doing
8723 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8724 * access should also end up with a mapping that won't need special unmapping.
8725 */
8726 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8727 * should speed up things for the pure interpreter as well when TLBs
8728 * are enabled. */
8729#ifdef RT_ARCH_AMD64
8730 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8731 {
8732 /* test byte [rbp - xxx], 0ffh */
8733 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8734 pbCodeBuf[off++] = 0xf6;
8735 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8736 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8737 pbCodeBuf[off++] = 0xff;
8738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8739 }
8740 else
8741#endif
8742 {
8743 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8744 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8745 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8746 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8747 }
8748 uint32_t const offJmpFixup = off;
8749 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8750
8751 /*
8752 * Call the unmap helper function.
8753 */
8754#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8755 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8756#else
8757 RT_NOREF(idxInstr);
8758#endif
8759
8760 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8761 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8762 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8763
8764 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8765 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8766
8767 /* Done setting up parameters, make the call. */
8768 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8769
8770 /* The bUnmapInfo variable is implictly free by these MCs. */
8771 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8772
8773 /*
8774 * Done, just fixup the jump for the non-call case.
8775 */
8776 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8777
8778 return off;
8779}
8780
8781
8782
8783/*********************************************************************************************************************************
8784* State and Exceptions *
8785*********************************************************************************************************************************/
8786
8787#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8788#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8789
8790#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8791#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8792#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8793
8794#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8795#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8796#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8797
8798
8799DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8800{
8801 /** @todo this needs a lot more work later. */
8802 RT_NOREF(pReNative, fForChange);
8803 return off;
8804}
8805
8806
8807
8808/*********************************************************************************************************************************
8809* Emitters for FPU related operations. *
8810*********************************************************************************************************************************/
8811
8812#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8813 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8814
8815/** Emits code for IEM_MC_FETCH_FCW. */
8816DECL_INLINE_THROW(uint32_t)
8817iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8818{
8819 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8820 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8821
8822 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8823
8824 /* Allocate a temporary FCW register. */
8825 /** @todo eliminate extra register */
8826 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8827 kIemNativeGstRegUse_ReadOnly);
8828
8829 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8830
8831 /* Free but don't flush the FCW register. */
8832 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8833 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8834
8835 return off;
8836}
8837
8838
8839#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8840 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8841
8842/** Emits code for IEM_MC_FETCH_FSW. */
8843DECL_INLINE_THROW(uint32_t)
8844iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8845{
8846 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8847 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8848
8849 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8850 /* Allocate a temporary FSW register. */
8851 /** @todo eliminate extra register */
8852 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8853 kIemNativeGstRegUse_ReadOnly);
8854
8855 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8856
8857 /* Free but don't flush the FSW register. */
8858 iemNativeRegFreeTmp(pReNative, idxFswReg);
8859 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8860
8861 return off;
8862}
8863
8864
8865
8866#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8867
8868
8869/*********************************************************************************************************************************
8870* Emitters for SSE/AVX specific operations. *
8871*********************************************************************************************************************************/
8872
8873#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8874 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8875
8876/** Emits code for IEM_MC_COPY_XREG_U128. */
8877DECL_INLINE_THROW(uint32_t)
8878iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8879{
8880 /* This is a nop if the source and destination register are the same. */
8881 if (iXRegDst != iXRegSrc)
8882 {
8883 /* Allocate destination and source register. */
8884 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8885 kIemNativeGstSimdRegLdStSz_Low128,
8886 kIemNativeGstRegUse_ForFullWrite);
8887 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8888 kIemNativeGstSimdRegLdStSz_Low128,
8889 kIemNativeGstRegUse_ReadOnly);
8890
8891 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8892
8893 /* Free but don't flush the source and destination register. */
8894 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8895 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8896 }
8897
8898 return off;
8899}
8900
8901
8902#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8903 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8904
8905/** Emits code for IEM_MC_FETCH_XREG_U128. */
8906DECL_INLINE_THROW(uint32_t)
8907iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8908{
8909 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8910 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8911
8912 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8913 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8914
8915 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8916
8917 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8918
8919 /* Free but don't flush the source register. */
8920 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8921 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8922
8923 return off;
8924}
8925
8926
8927#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8928 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8929
8930#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8931 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8932
8933/** Emits code for IEM_MC_FETCH_XREG_U64. */
8934DECL_INLINE_THROW(uint32_t)
8935iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8936{
8937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8938 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8939
8940 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8941 kIemNativeGstSimdRegLdStSz_Low128,
8942 kIemNativeGstRegUse_ReadOnly);
8943
8944 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8945 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8946
8947 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8948
8949 /* Free but don't flush the source register. */
8950 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8951 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8952
8953 return off;
8954}
8955
8956
8957#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8958 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8959
8960#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8961 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8962
8963/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8964DECL_INLINE_THROW(uint32_t)
8965iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8966{
8967 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8968 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8969
8970 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8971 kIemNativeGstSimdRegLdStSz_Low128,
8972 kIemNativeGstRegUse_ReadOnly);
8973
8974 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8975 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8976
8977 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8978
8979 /* Free but don't flush the source register. */
8980 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8981 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8982
8983 return off;
8984}
8985
8986
8987#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8988 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8989
8990/** Emits code for IEM_MC_FETCH_XREG_U16. */
8991DECL_INLINE_THROW(uint32_t)
8992iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8993{
8994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8995 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8996
8997 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8998 kIemNativeGstSimdRegLdStSz_Low128,
8999 kIemNativeGstRegUse_ReadOnly);
9000
9001 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9002 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9003
9004 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9005
9006 /* Free but don't flush the source register. */
9007 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9008 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9009
9010 return off;
9011}
9012
9013
9014#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9015 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9016
9017/** Emits code for IEM_MC_FETCH_XREG_U8. */
9018DECL_INLINE_THROW(uint32_t)
9019iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9020{
9021 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9022 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9023
9024 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9025 kIemNativeGstSimdRegLdStSz_Low128,
9026 kIemNativeGstRegUse_ReadOnly);
9027
9028 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9029 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9030
9031 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9032
9033 /* Free but don't flush the source register. */
9034 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9035 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9036
9037 return off;
9038}
9039
9040
9041#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9042 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9043
9044AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9045#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9046 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9047
9048
9049/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9050DECL_INLINE_THROW(uint32_t)
9051iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9052{
9053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9054 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9055
9056 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9057 kIemNativeGstSimdRegLdStSz_Low128,
9058 kIemNativeGstRegUse_ForFullWrite);
9059 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9060
9061 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9062
9063 /* Free but don't flush the source register. */
9064 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9065 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9066
9067 return off;
9068}
9069
9070
9071#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9072 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9073
9074#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9075 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9076
9077#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9078 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9079
9080#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9081 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9082
9083#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9084 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9085
9086#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9087 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9088
9089/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9090DECL_INLINE_THROW(uint32_t)
9091iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9092 uint8_t cbLocal, uint8_t iElem)
9093{
9094 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9095 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9096
9097#ifdef VBOX_STRICT
9098 switch (cbLocal)
9099 {
9100 case sizeof(uint64_t): Assert(iElem < 2); break;
9101 case sizeof(uint32_t): Assert(iElem < 4); break;
9102 case sizeof(uint16_t): Assert(iElem < 8); break;
9103 case sizeof(uint8_t): Assert(iElem < 16); break;
9104 default: AssertFailed();
9105 }
9106#endif
9107
9108 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9109 kIemNativeGstSimdRegLdStSz_Low128,
9110 kIemNativeGstRegUse_ForUpdate);
9111 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9112
9113 switch (cbLocal)
9114 {
9115 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9116 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9117 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9118 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9119 default: AssertFailed();
9120 }
9121
9122 /* Free but don't flush the source register. */
9123 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9124 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9125
9126 return off;
9127}
9128
9129
9130#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9131 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9132
9133/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9134DECL_INLINE_THROW(uint32_t)
9135iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9136{
9137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9138 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9139
9140 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9141 kIemNativeGstSimdRegLdStSz_Low128,
9142 kIemNativeGstRegUse_ForUpdate);
9143 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9144
9145 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9146 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9147 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9148
9149 /* Free but don't flush the source register. */
9150 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9151 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9152
9153 return off;
9154}
9155
9156
9157#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9158 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9159
9160/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9161DECL_INLINE_THROW(uint32_t)
9162iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9163{
9164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9165 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9166
9167 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9168 kIemNativeGstSimdRegLdStSz_Low128,
9169 kIemNativeGstRegUse_ForUpdate);
9170 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9171
9172 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9173 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9174 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9175
9176 /* Free but don't flush the source register. */
9177 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9178 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9179
9180 return off;
9181}
9182
9183
9184#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9185 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9186
9187/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9188DECL_INLINE_THROW(uint32_t)
9189iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9190 uint8_t idxSrcVar, uint8_t iDwSrc)
9191{
9192 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9193 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9194
9195 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9196 kIemNativeGstSimdRegLdStSz_Low128,
9197 kIemNativeGstRegUse_ForUpdate);
9198 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9199
9200 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9201 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9202
9203 /* Free but don't flush the destination register. */
9204 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9205 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9206
9207 return off;
9208}
9209
9210
9211#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9212 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9213
9214/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9215DECL_INLINE_THROW(uint32_t)
9216iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9217{
9218 /*
9219 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9220 * if iYRegDst gets allocated first for the full write it won't load the
9221 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9222 * duplicated from the already allocated host register for iYRegDst containing
9223 * garbage. This will be catched by the guest register value checking in debug
9224 * builds.
9225 */
9226 if (iYRegDst != iYRegSrc)
9227 {
9228 /* Allocate destination and source register. */
9229 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9230 kIemNativeGstSimdRegLdStSz_256,
9231 kIemNativeGstRegUse_ForFullWrite);
9232 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9233 kIemNativeGstSimdRegLdStSz_Low128,
9234 kIemNativeGstRegUse_ReadOnly);
9235
9236 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9237 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9238
9239 /* Free but don't flush the source and destination register. */
9240 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9241 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9242 }
9243 else
9244 {
9245 /* This effectively only clears the upper 128-bits of the register. */
9246 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9247 kIemNativeGstSimdRegLdStSz_High128,
9248 kIemNativeGstRegUse_ForFullWrite);
9249
9250 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9251
9252 /* Free but don't flush the destination register. */
9253 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9254 }
9255
9256 return off;
9257}
9258
9259
9260#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9261 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9262
9263/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9264DECL_INLINE_THROW(uint32_t)
9265iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9266{
9267 /*
9268 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9269 * if iYRegDst gets allocated first for the full write it won't load the
9270 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9271 * duplicated from the already allocated host register for iYRegDst containing
9272 * garbage. This will be catched by the guest register value checking in debug
9273 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9274 * for a zmm register we don't support yet, so this is just a nop.
9275 */
9276 if (iYRegDst != iYRegSrc)
9277 {
9278 /* Allocate destination and source register. */
9279 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9280 kIemNativeGstSimdRegLdStSz_256,
9281 kIemNativeGstRegUse_ReadOnly);
9282 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9283 kIemNativeGstSimdRegLdStSz_256,
9284 kIemNativeGstRegUse_ForFullWrite);
9285
9286 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9287
9288 /* Free but don't flush the source and destination register. */
9289 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9290 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9291 }
9292
9293 return off;
9294}
9295
9296
9297#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9298 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9299
9300/** Emits code for IEM_MC_FETCH_YREG_U128. */
9301DECL_INLINE_THROW(uint32_t)
9302iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9303{
9304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9305 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9306
9307 Assert(iDQWord <= 1);
9308 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9309 iDQWord == 1
9310 ? kIemNativeGstSimdRegLdStSz_High128
9311 : kIemNativeGstSimdRegLdStSz_Low128,
9312 kIemNativeGstRegUse_ReadOnly);
9313
9314 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9315 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9316
9317 if (iDQWord == 1)
9318 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9319 else
9320 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9321
9322 /* Free but don't flush the source register. */
9323 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9324 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9325
9326 return off;
9327}
9328
9329
9330#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9331 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9332
9333/** Emits code for IEM_MC_FETCH_YREG_U64. */
9334DECL_INLINE_THROW(uint32_t)
9335iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9336{
9337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9338 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9339
9340 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9341 iQWord >= 2
9342 ? kIemNativeGstSimdRegLdStSz_High128
9343 : kIemNativeGstSimdRegLdStSz_Low128,
9344 kIemNativeGstRegUse_ReadOnly);
9345
9346 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9347 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9348
9349 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9350
9351 /* Free but don't flush the source register. */
9352 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9353 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9354
9355 return off;
9356}
9357
9358
9359#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9360 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9361
9362/** Emits code for IEM_MC_FETCH_YREG_U32. */
9363DECL_INLINE_THROW(uint32_t)
9364iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9365{
9366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9367 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9368
9369 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9370 iDWord >= 4
9371 ? kIemNativeGstSimdRegLdStSz_High128
9372 : kIemNativeGstSimdRegLdStSz_Low128,
9373 kIemNativeGstRegUse_ReadOnly);
9374
9375 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9376 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9377
9378 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9379
9380 /* Free but don't flush the source register. */
9381 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9382 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9383
9384 return off;
9385}
9386
9387
9388#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9389 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9390
9391/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9392DECL_INLINE_THROW(uint32_t)
9393iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9394{
9395 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9396 kIemNativeGstSimdRegLdStSz_High128,
9397 kIemNativeGstRegUse_ForFullWrite);
9398
9399 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9400
9401 /* Free but don't flush the register. */
9402 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9403
9404 return off;
9405}
9406
9407
9408#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9409 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9410
9411/** Emits code for IEM_MC_STORE_YREG_U128. */
9412DECL_INLINE_THROW(uint32_t)
9413iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9414{
9415 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9416 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9417
9418 Assert(iDQword <= 1);
9419 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9420 iDQword == 0
9421 ? kIemNativeGstSimdRegLdStSz_Low128
9422 : kIemNativeGstSimdRegLdStSz_High128,
9423 kIemNativeGstRegUse_ForFullWrite);
9424
9425 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9426
9427 if (iDQword == 0)
9428 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9429 else
9430 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9431
9432 /* Free but don't flush the source register. */
9433 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9434 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9435
9436 return off;
9437}
9438
9439
9440#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9441 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9442
9443/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9444DECL_INLINE_THROW(uint32_t)
9445iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9446{
9447 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9448 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9449
9450 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9451 kIemNativeGstSimdRegLdStSz_256,
9452 kIemNativeGstRegUse_ForFullWrite);
9453
9454 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9455
9456 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9457 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9458
9459 /* Free but don't flush the source register. */
9460 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9461 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9462
9463 return off;
9464}
9465
9466
9467#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9468 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9469
9470/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9471DECL_INLINE_THROW(uint32_t)
9472iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9473{
9474 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9475 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9476
9477 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9478 kIemNativeGstSimdRegLdStSz_256,
9479 kIemNativeGstRegUse_ForFullWrite);
9480
9481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9482
9483 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9484 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9485
9486 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9487 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9488
9489 return off;
9490}
9491
9492
9493#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9494 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9495
9496/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9497DECL_INLINE_THROW(uint32_t)
9498iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9499{
9500 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9501 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9502
9503 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9504 kIemNativeGstSimdRegLdStSz_256,
9505 kIemNativeGstRegUse_ForFullWrite);
9506
9507 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9508
9509 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9510 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9511
9512 /* Free but don't flush the source register. */
9513 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9514 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9515
9516 return off;
9517}
9518
9519
9520#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9521 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9522
9523/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9524DECL_INLINE_THROW(uint32_t)
9525iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9526{
9527 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9528 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9529
9530 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9531 kIemNativeGstSimdRegLdStSz_256,
9532 kIemNativeGstRegUse_ForFullWrite);
9533
9534 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9535
9536 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9537 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9538
9539 /* Free but don't flush the source register. */
9540 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9541 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9542
9543 return off;
9544}
9545
9546
9547#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9548 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9549
9550/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9551DECL_INLINE_THROW(uint32_t)
9552iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9553{
9554 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9555 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9556
9557 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9558 kIemNativeGstSimdRegLdStSz_256,
9559 kIemNativeGstRegUse_ForFullWrite);
9560
9561 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9562
9563 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9564 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9565
9566 /* Free but don't flush the source register. */
9567 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9568 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9569
9570 return off;
9571}
9572
9573
9574#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9575 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9576
9577/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9578DECL_INLINE_THROW(uint32_t)
9579iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9580{
9581 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9582 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9583
9584 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9585 kIemNativeGstSimdRegLdStSz_256,
9586 kIemNativeGstRegUse_ForFullWrite);
9587
9588 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9589
9590 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9591
9592 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9593 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9594
9595 return off;
9596}
9597
9598
9599#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9600 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9601
9602/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9603DECL_INLINE_THROW(uint32_t)
9604iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9605{
9606 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9607 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9608
9609 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9610 kIemNativeGstSimdRegLdStSz_256,
9611 kIemNativeGstRegUse_ForFullWrite);
9612
9613 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9614
9615 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9616
9617 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9618 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9619
9620 return off;
9621}
9622
9623
9624#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9625 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9626
9627/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9628DECL_INLINE_THROW(uint32_t)
9629iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9630{
9631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9632 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9633
9634 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9635 kIemNativeGstSimdRegLdStSz_256,
9636 kIemNativeGstRegUse_ForFullWrite);
9637
9638 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9639
9640 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9641
9642 /* Free but don't flush the source register. */
9643 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9644 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9645
9646 return off;
9647}
9648
9649
9650#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9651 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9652
9653/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9654DECL_INLINE_THROW(uint32_t)
9655iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9656{
9657 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9658 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9659
9660 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9661 kIemNativeGstSimdRegLdStSz_256,
9662 kIemNativeGstRegUse_ForFullWrite);
9663
9664 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9665
9666 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9667
9668 /* Free but don't flush the source register. */
9669 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9670 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9671
9672 return off;
9673}
9674
9675
9676#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9677 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9678
9679/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9680DECL_INLINE_THROW(uint32_t)
9681iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9682{
9683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9684 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9685
9686 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9687 kIemNativeGstSimdRegLdStSz_256,
9688 kIemNativeGstRegUse_ForFullWrite);
9689
9690 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9691
9692 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9693
9694 /* Free but don't flush the source register. */
9695 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9696 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9697
9698 return off;
9699}
9700
9701
9702#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9703 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9704
9705/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9706DECL_INLINE_THROW(uint32_t)
9707iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9708{
9709 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9710 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9711
9712 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9713 kIemNativeGstSimdRegLdStSz_256,
9714 kIemNativeGstRegUse_ForFullWrite);
9715
9716 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9717
9718 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9719 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9720
9721 /* Free but don't flush the source register. */
9722 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9723 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9724
9725 return off;
9726}
9727
9728
9729#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9730 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9731
9732/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9733DECL_INLINE_THROW(uint32_t)
9734iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9735{
9736 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9737 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9738
9739 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9740 kIemNativeGstSimdRegLdStSz_256,
9741 kIemNativeGstRegUse_ForFullWrite);
9742
9743 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9744
9745 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9746 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9747
9748 /* Free but don't flush the source register. */
9749 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9750 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9751
9752 return off;
9753}
9754
9755
9756#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9757 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9758
9759/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9760DECL_INLINE_THROW(uint32_t)
9761iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9762{
9763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9764 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9765
9766 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9767 kIemNativeGstSimdRegLdStSz_256,
9768 kIemNativeGstRegUse_ForFullWrite);
9769 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9770 kIemNativeGstSimdRegLdStSz_Low128,
9771 kIemNativeGstRegUse_ReadOnly);
9772 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9773
9774 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9775 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9776 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9777
9778 /* Free but don't flush the source and destination registers. */
9779 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9780 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9781 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9782
9783 return off;
9784}
9785
9786
9787#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9788 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9789
9790/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9791DECL_INLINE_THROW(uint32_t)
9792iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9793{
9794 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9795 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9796
9797 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9798 kIemNativeGstSimdRegLdStSz_256,
9799 kIemNativeGstRegUse_ForFullWrite);
9800 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9801 kIemNativeGstSimdRegLdStSz_Low128,
9802 kIemNativeGstRegUse_ReadOnly);
9803 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9804
9805 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9806 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9807 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9808
9809 /* Free but don't flush the source and destination registers. */
9810 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9811 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9812 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9813
9814 return off;
9815}
9816
9817
9818#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9819 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9820
9821
9822/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9823DECL_INLINE_THROW(uint32_t)
9824iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9825{
9826 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9827 kIemNativeGstSimdRegLdStSz_Low128,
9828 kIemNativeGstRegUse_ForUpdate);
9829
9830 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9831 if (bImm8Mask & RT_BIT(0))
9832 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9833 if (bImm8Mask & RT_BIT(1))
9834 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9835 if (bImm8Mask & RT_BIT(2))
9836 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9837 if (bImm8Mask & RT_BIT(3))
9838 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9839
9840 /* Free but don't flush the destination register. */
9841 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9842
9843 return off;
9844}
9845
9846
9847#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9848 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9849
9850#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
9851 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
9852
9853/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
9854DECL_INLINE_THROW(uint32_t)
9855iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9856{
9857 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9858 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9859
9860 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9861 kIemNativeGstSimdRegLdStSz_256,
9862 kIemNativeGstRegUse_ReadOnly);
9863 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9864
9865 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9866
9867 /* Free but don't flush the source register. */
9868 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9869 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9870
9871 return off;
9872}
9873
9874
9875#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9876 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9877
9878#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
9879 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
9880
9881/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
9882DECL_INLINE_THROW(uint32_t)
9883iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9884{
9885 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9886 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9887
9888 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9889 kIemNativeGstSimdRegLdStSz_256,
9890 kIemNativeGstRegUse_ForFullWrite);
9891 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9892
9893 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9894
9895 /* Free but don't flush the source register. */
9896 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9897 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9898
9899 return off;
9900}
9901
9902
9903#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9904 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9905
9906
9907/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9908DECL_INLINE_THROW(uint32_t)
9909iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9910 uint8_t idxSrcVar, uint8_t iDwSrc)
9911{
9912 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9913 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9914
9915 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9916 iDwDst < 4
9917 ? kIemNativeGstSimdRegLdStSz_Low128
9918 : kIemNativeGstSimdRegLdStSz_High128,
9919 kIemNativeGstRegUse_ForUpdate);
9920 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9921 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9922
9923 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9924 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9925
9926 /* Free but don't flush the source register. */
9927 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9928 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9929 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9930
9931 return off;
9932}
9933
9934
9935#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9936 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9937
9938
9939/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9940DECL_INLINE_THROW(uint32_t)
9941iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9942 uint8_t idxSrcVar, uint8_t iQwSrc)
9943{
9944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9945 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9946
9947 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9948 iQwDst < 2
9949 ? kIemNativeGstSimdRegLdStSz_Low128
9950 : kIemNativeGstSimdRegLdStSz_High128,
9951 kIemNativeGstRegUse_ForUpdate);
9952 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9953 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9954
9955 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9956 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9957
9958 /* Free but don't flush the source register. */
9959 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9960 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9961 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9962
9963 return off;
9964}
9965
9966
9967#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9968 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9969
9970
9971/** Emits code for IEM_MC_STORE_YREG_U64. */
9972DECL_INLINE_THROW(uint32_t)
9973iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9974{
9975 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9976 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9977
9978 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9979 iQwDst < 2
9980 ? kIemNativeGstSimdRegLdStSz_Low128
9981 : kIemNativeGstSimdRegLdStSz_High128,
9982 kIemNativeGstRegUse_ForUpdate);
9983
9984 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9985
9986 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9987
9988 /* Free but don't flush the source register. */
9989 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9990 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9991
9992 return off;
9993}
9994
9995
9996#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9997 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9998
9999/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10000DECL_INLINE_THROW(uint32_t)
10001iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10002{
10003 RT_NOREF(pReNative, iYReg);
10004 /** @todo Needs to be implemented when support for AVX-512 is added. */
10005 return off;
10006}
10007
10008
10009
10010/*********************************************************************************************************************************
10011* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10012*********************************************************************************************************************************/
10013
10014/**
10015 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10016 */
10017DECL_INLINE_THROW(uint32_t)
10018iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
10019{
10020 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10021 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10022 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10023 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10024
10025 /*
10026 * Need to do the FPU preparation.
10027 */
10028 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10029
10030 /*
10031 * Do all the call setup and cleanup.
10032 */
10033 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10034 false /*fFlushPendingWrites*/);
10035
10036 /*
10037 * Load the MXCSR register into the first argument and mask out the current exception flags.
10038 */
10039 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10040 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10041
10042 /*
10043 * Make the call.
10044 */
10045 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10046
10047 /*
10048 * The updated MXCSR is in the return register.
10049 */
10050 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
10051
10052#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10053 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10054 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10055#endif
10056 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10057
10058 return off;
10059}
10060
10061
10062#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10063 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10064
10065/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10066DECL_INLINE_THROW(uint32_t)
10067iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10068{
10069 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10070 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10071 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10072}
10073
10074
10075#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10076 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10077
10078/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10079DECL_INLINE_THROW(uint32_t)
10080iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl,
10081 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10082{
10083 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10084 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10085 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10086 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10087}
10088
10089
10090/*********************************************************************************************************************************
10091* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10092*********************************************************************************************************************************/
10093
10094#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10095 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10096
10097/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10098DECL_INLINE_THROW(uint32_t)
10099iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10100{
10101 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10102 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10103 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10104}
10105
10106
10107#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10108 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10109
10110/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10111DECL_INLINE_THROW(uint32_t)
10112iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl,
10113 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10114{
10115 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10116 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10117 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10118 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10119}
10120
10121
10122#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10123
10124
10125/*********************************************************************************************************************************
10126* Include instruction emitters. *
10127*********************************************************************************************************************************/
10128#include "target-x86/IEMAllN8veEmit-x86.h"
10129
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette